build-feed.py - quatuorbellefeuille.com - Content, build scripts and admin scripts for the Bellefeuille Quartet website.

build-feed.py (4760B)
      1 #!/usr/bin/env python3
      2 
      3 from datetime import datetime
      4 import json
      5 import re
      6 from sys import argv
      7 from urllib.parse import urljoin
      8 
      9 from lxml.builder import E
     10 from lxml.etree import CDATA, XML, indent, tostring
     11 
     12 from helpers import (
     13     DATE_FORMATTERS,
     14     guess_language,
     15     read_concerts,
     16     tmplocale,
     17     touchup_plaintext,
     18 )
     19 
     20 
     21 def zoned_datetime(dt):
     22     # Assume that whoever wrote the naive timestamp that this datetime
     23     # was generated from had the same local time we do.
     24 
     25     # Note: even if we assume that all timestamps in concerts.in and
     26     # concerts-pubdates.json come from the same "zone" (Europe/Paris),
     27     # the *time offset* can differ (CET vs CEST), so we can't just
     28     # define a global TIMEZONE = datetime.now().tzinfo and slap that
     29     # on every datetime.
     30 
     31     return dt.replace(tzinfo=dt.astimezone().tzinfo)
     32 
     33 
     34 NOW = zoned_datetime(datetime.now())
     35 DATE_FORMAT = '%-d %b %Y %H:%M %z'
     36 
     37 
     38 LOCALIZED_TEXT = {
     39     'en': {
     40         'title': 'Bellefeuille Quartet',
     41         'indexpath': 'en/',
     42         'description': 'News from the Bellefeuille quartet',
     43     },
     44     'fr': {
     45         'title': 'Quatuor Bellefeuille',
     46         'indexpath': '/',
     47         'description': 'Des nouvelles du quatuor Bellefeuille',
     48     },
     49 }
     50 
     51 LOCALIZED_FORMATS = {
     52     'en': {
     53         'title': lambda c: f'{c.time.strftime("%B %-d %Y")} in {c.place}',
     54     },
     55     'fr': {
     56         'title': lambda c: f'{c.time.strftime("%-d %B %Y")} à {c.place}',
     57     },
     58 }
     59 
     60 
     61 def join(sequence, joiner_factory):
     62     # There's got to be a standard itertools/functools thingy to do that…
     63     result = []
     64 
     65     for i, item in enumerate(sequence, start=1):
     66         result.append(item)
     67 
     68         if i == len(sequence):
     69             break
     70 
     71         result.append(joiner_factory())
     72 
     73     return result
     74 
     75 
     76 def cdata_concert(concert, lang):
     77     formatters = DATE_FORMATTERS[lang]
     78 
     79     blocks = []
     80 
     81     if concert.warning is not None:
     82         blocks.append(E.p(concert.warning))
     83 
     84     with tmplocale(lang):
     85         blocks.extend((
     86             E.p(formatters['date'](concert.time)),
     87             E.p(formatters['time'](concert.time)),
     88         ))
     89 
     90     pieces = touchup_plaintext(concert.pieces)
     91     instructions = touchup_plaintext(concert.instructions)
     92 
     93     blocks.extend((
     94         E.p(*join(concert.address.splitlines(), E.br)),
     95         E.ol(
     96             *(XML(f'<li>{line}</li>') for line in pieces.splitlines())
     97         ),
     98         *(XML(f'<p>{line}</p>') for line in instructions.splitlines() if line),
     99     ))
    100 
    101     # Do a silly dance to indent CDATA correctly.
    102 
    103     for b in blocks:
    104         indent(b)
    105 
    106     html_blocks = (tostring(b, encoding='utf-8').decode() for b in blocks)
    107 
    108     cdata = '\n'.join(html_blocks) + '\n'
    109     cdata = re.sub('^', 8*' ', cdata, flags=re.MULTILINE)
    110 
    111     return CDATA('\n' + cdata)
    112 
    113 
    114 def generate_concert(concert, concerts_url, pubdates, lang):
    115     formatters = LOCALIZED_FORMATS[lang]
    116 
    117     with tmplocale(lang):
    118         title = formatters['title'](concert)
    119 
    120     anchor = f'concert-{concert.time.strftime("%F")}'
    121 
    122     item = E.item(
    123         E.title(title),
    124         E.link(f'{concerts_url}#{anchor}'),
    125         E.description(cdata_concert(concert, lang)),
    126     )
    127 
    128     pubdate_str = pubdates[concert.time.isoformat(timespec='minutes')]
    129 
    130     if pubdate_str is not None:
    131         pubdate = zoned_datetime(datetime.fromisoformat(pubdate_str))
    132         item.append(E.pubDate(pubdate.strftime(DATE_FORMAT)))
    133 
    134     return item
    135 
    136 
    137 def generate_concerts(concerts_src, concerts_url, concerts_pubdates, lang):
    138     with open(concerts_pubdates) as pubdates_file:
    139         pubdates = json.load(pubdates_file)
    140 
    141     return tuple(
    142         generate_concert(c, concerts_url, pubdates, lang)
    143         for c in read_concerts(concerts_src)
    144     )
    145 
    146 
    147 def main(concerts_src, feed_dst, concerts_pubdates, domain):
    148     lang = guess_language(concerts_src)
    149     text = LOCALIZED_TEXT[lang]
    150 
    151     url = f'https://{domain}'
    152     index_url = urljoin(url, text['indexpath'])
    153     concerts_url = urljoin(index_url, 'concerts.html')
    154 
    155     now_formatted = NOW.strftime(DATE_FORMAT)
    156 
    157     concerts = generate_concerts(
    158         concerts_src, concerts_url, concerts_pubdates, lang
    159     )
    160 
    161     rss = E.rss(
    162         E.channel(
    163             E.title(text['title']),
    164             E.link(index_url),
    165             E.description(text['description']),
    166             E.image(
    167                 E.url(urljoin(url, 'images/logo.svg')),
    168                 E.link(concerts_url),
    169             ),
    170             E.lastBuildDate(now_formatted),
    171             E.pubDate(now_formatted),
    172             E.language(lang),
    173             *concerts,
    174         ),
    175         version='2.0',
    176     )
    177 
    178     indent(rss)
    179 
    180     with open(feed_dst, 'wb') as feed:
    181         feed.write(tostring(rss, encoding='utf-8', xml_declaration=True))
    182 
    183 
    184 if __name__ == '__main__':
    185     main(*argv[1:])
	quatuorbellefeuille.com Content, build scripts and admin scripts for the Bellefeuille Quartet website.
	git clone https://git.kevinlegouguec.net/quatuorbellefeuille.com
	Log \| Files \| Refs