From 66b5366b7b377f9e595ee9b97261b65a0064aa5b Mon Sep 17 00:00:00 2001 From: Kévin Le Gouguec Date: Wed, 23 Feb 2022 10:26:02 +0100 Subject: Add script to update feeds --- admin/feeds/build-feed.py | 186 +++++++++++++++++++++++++++++++++++++ admin/feeds/build-feeds.sh | 19 ++++ admin/feeds/concerts-pubdates.json | 15 +++ 3 files changed, 220 insertions(+) create mode 100755 admin/feeds/build-feed.py create mode 100755 admin/feeds/build-feeds.sh create mode 100644 admin/feeds/concerts-pubdates.json (limited to 'admin') diff --git a/admin/feeds/build-feed.py b/admin/feeds/build-feed.py new file mode 100755 index 0000000..5fe5c04 --- /dev/null +++ b/admin/feeds/build-feed.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 + +from datetime import datetime +import json +import re +from sys import argv +from urllib.parse import urljoin + +from lxml.builder import E +from lxml.etree import CDATA, XML, indent, tostring + +from helpers import ( + DATE_FORMATTERS, + guess_language, + read_concerts, + tmplocale, + touchup_plaintext, +) + + +# TODO: handle timezones correctly. +# Places to disambiguate: +# +# - concerts.in: +# either add the zone explicitly, or deduce it from the place, +# assuming all times in concerts.in are local times. +# +# - concerts-pubdates.json: +# just add the zone explicitly. +# +# Until then, assume all these "naive times" describe the same timezone +# (CET/CEST). + + +TIMEZONE = datetime.now().astimezone().tzinfo +NOW = datetime.now(tz=TIMEZONE) +DATE_FORMAT = '%-d %b %Y %H:%M %z' + +# TODO: add item pubDate + + +LOCALIZED_TEXT = { + 'en': { + 'title': 'Bellefeuille Quartet', + 'indexpath': 'en/', + 'description': 'News from the Bellefeuille quartet', + }, + 'fr': { + 'title': 'Quatuor Bellefeuille', + 'indexpath': '/', + 'description': 'Des nouvelles du quatuor Bellefeuille', + }, +} + +LOCALIZED_FORMATS = { + 'en': { + 'title': lambda c: f'{c.time.strftime("%B %-d %Y")} in {c.place}', + }, + 'fr': { + 'title': lambda c: f'{c.time.strftime("%-d %B %Y")} à {c.place}', + }, +} + + +def join(sequence, joiner_factory): + # There's got to be a standard itertools/functools thingy to do that… + result = [] + + for i, item in enumerate(sequence, start=1): + result.append(item) + + if i == len(sequence): + return result + + result.append(joiner_factory()) + + +CDATA_INDENT = 8*' ' + + +def cdata_concert(concert, lang): + formatters = DATE_FORMATTERS[lang] + + blocks = [] + + if concert.warning is not None: + blocks.append(E.p(concert.warning)) + + with tmplocale(lang): + blocks.extend(( + E.p(formatters['date'](concert.time)), + E.p(formatters['time'](concert.time)), + )) + + blocks.extend(( + E.p(*join(concert.address.splitlines(), E.br)), + E.ol( + *(XML(f'
  • {touchup_plaintext(p)}
  • ') + for p in concert.pieces.splitlines()) + ), + *(E.p(line) for line in concert.instructions.splitlines()), + )) + + for b in blocks: + indent(b) + + html_blocks = (tostring(b, encoding='utf-8').decode() for b in blocks) + + cdata = '\n' + '\n'.join(html_blocks) + '\n' + cdata = re.sub('^', CDATA_INDENT, cdata, flags=re.MULTILINE) + + return CDATA(cdata) + + +def generate_concert(concert, concerts_url, pubdates, lang): + formatters = LOCALIZED_FORMATS[lang] + + with tmplocale(lang): + title = formatters['title'](concert) + + anchor = f'concert-{concert.time.strftime("%F")}' + + item = E.item( + E.title(title), + E.link(f'{concerts_url}#{anchor}'), + E.description(cdata_concert(concert, lang)), + ) + + pubdate_str = pubdates[concert.time.isoformat(timespec='minutes')] + + if pubdate_str is not None: + pubdate = datetime.fromisoformat(pubdate_str).replace(tzinfo=TIMEZONE) + item.append(E.pubDate(pubdate.strftime(DATE_FORMAT))) + + return item + + +def generate_concerts(concerts_src, concerts_url, concerts_pubdates, lang): + with open(concerts_pubdates) as pubdates_file: + pubdates = json.load(pubdates_file) + + return tuple( + generate_concert(c, concerts_url, pubdates, lang) + for c in read_concerts(concerts_src) + ) + + +def main(concerts_src, feed_dst, concerts_pubdates, domain): + lang = guess_language(concerts_src) + text = LOCALIZED_TEXT[lang] + + url = f'https://{domain}' + index_url = urljoin(url, text['indexpath']) + concerts_url = urljoin(index_url, 'concerts.html') + + now_formatted = NOW.strftime(DATE_FORMAT) + + concerts = generate_concerts( + concerts_src, concerts_url, concerts_pubdates, lang + ) + + rss = E.rss( + E.channel( + E.title(text['title']), + E.link(index_url), + E.description(text['description']), + E.image( + E.url(urljoin(url, 'images/logo.svg')), + E.link(concerts_url), + ), + E.lastBuildDate(now_formatted), + E.pubDate(now_formatted), + E.language(lang), + *concerts, + ), + version='2.0', + ) + + indent(rss) + + with open(feed_dst, 'wb') as feed: + feed.write(tostring(rss, encoding='utf-8', xml_declaration=True)) + + +if __name__ == '__main__': + main(*argv[1:]) diff --git a/admin/feeds/build-feeds.sh b/admin/feeds/build-feeds.sh new file mode 100755 index 0000000..99b4a6e --- /dev/null +++ b/admin/feeds/build-feeds.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -eu + +HERE=$(dirname "$0") +ROOT=${HERE}/../.. + +. "${ROOT}"/settings.sh + +FEEDS=("$@") + +for feed in "${FEEDS[@]}" +do + concert=$(dirname "${feed}")/concerts.in + + PYTHONPATH="${ROOT}" \ + "${HERE}"/build-feed.py "${concert}" "${feed}" \ + "${HERE}"/concerts-pubdates.json ${domain} +done diff --git a/admin/feeds/concerts-pubdates.json b/admin/feeds/concerts-pubdates.json new file mode 100644 index 0000000..4232ed7 --- /dev/null +++ b/admin/feeds/concerts-pubdates.json @@ -0,0 +1,15 @@ +{ + "2019-10-05T17:00": null, + "2019-10-06T16:00": null, + "2020-03-08T16:00": null, + "2020-08-24T20:00": null, + "2021-04-03T20:00": "2021-03-19T16:00", + "2021-06-13T15:00": "2021-03-19T16:00", + "2021-08-17T20:30": null, + "2021-08-19T20:00": null, + "2021-10-25T18:00": null, + "2021-10-28T18:00": null, + "2021-12-12T16:00": "2021-11-23T23:28", + "2021-12-31T20:00": null, + "2022-05-07T17:00": null +} -- cgit v1.2.3