build-feed.py (4760B)
1 #!/usr/bin/env python3 2 3 from datetime import datetime 4 import json 5 import re 6 from sys import argv 7 from urllib.parse import urljoin 8 9 from lxml.builder import E 10 from lxml.etree import CDATA, XML, indent, tostring 11 12 from helpers import ( 13 DATE_FORMATTERS, 14 guess_language, 15 read_concerts, 16 tmplocale, 17 touchup_plaintext, 18 ) 19 20 21 def zoned_datetime(dt): 22 # Assume that whoever wrote the naive timestamp that this datetime 23 # was generated from had the same local time we do. 24 25 # Note: even if we assume that all timestamps in concerts.in and 26 # concerts-pubdates.json come from the same "zone" (Europe/Paris), 27 # the *time offset* can differ (CET vs CEST), so we can't just 28 # define a global TIMEZONE = datetime.now().tzinfo and slap that 29 # on every datetime. 30 31 return dt.replace(tzinfo=dt.astimezone().tzinfo) 32 33 34 NOW = zoned_datetime(datetime.now()) 35 DATE_FORMAT = '%-d %b %Y %H:%M %z' 36 37 38 LOCALIZED_TEXT = { 39 'en': { 40 'title': 'Bellefeuille Quartet', 41 'indexpath': 'en/', 42 'description': 'News from the Bellefeuille quartet', 43 }, 44 'fr': { 45 'title': 'Quatuor Bellefeuille', 46 'indexpath': '/', 47 'description': 'Des nouvelles du quatuor Bellefeuille', 48 }, 49 } 50 51 LOCALIZED_FORMATS = { 52 'en': { 53 'title': lambda c: f'{c.time.strftime("%B %-d %Y")} in {c.place}', 54 }, 55 'fr': { 56 'title': lambda c: f'{c.time.strftime("%-d %B %Y")} à {c.place}', 57 }, 58 } 59 60 61 def join(sequence, joiner_factory): 62 # There's got to be a standard itertools/functools thingy to do that… 63 result = [] 64 65 for i, item in enumerate(sequence, start=1): 66 result.append(item) 67 68 if i == len(sequence): 69 break 70 71 result.append(joiner_factory()) 72 73 return result 74 75 76 def cdata_concert(concert, lang): 77 formatters = DATE_FORMATTERS[lang] 78 79 blocks = [] 80 81 if concert.warning is not None: 82 blocks.append(E.p(concert.warning)) 83 84 with tmplocale(lang): 85 blocks.extend(( 86 E.p(formatters['date'](concert.time)), 87 E.p(formatters['time'](concert.time)), 88 )) 89 90 pieces = touchup_plaintext(concert.pieces) 91 instructions = touchup_plaintext(concert.instructions) 92 93 blocks.extend(( 94 E.p(*join(concert.address.splitlines(), E.br)), 95 E.ol( 96 *(XML(f'<li>{line}</li>') for line in pieces.splitlines()) 97 ), 98 *(XML(f'<p>{line}</p>') for line in instructions.splitlines() if line), 99 )) 100 101 # Do a silly dance to indent CDATA correctly. 102 103 for b in blocks: 104 indent(b) 105 106 html_blocks = (tostring(b, encoding='utf-8').decode() for b in blocks) 107 108 cdata = '\n'.join(html_blocks) + '\n' 109 cdata = re.sub('^', 8*' ', cdata, flags=re.MULTILINE) 110 111 return CDATA('\n' + cdata) 112 113 114 def generate_concert(concert, concerts_url, pubdates, lang): 115 formatters = LOCALIZED_FORMATS[lang] 116 117 with tmplocale(lang): 118 title = formatters['title'](concert) 119 120 anchor = f'concert-{concert.time.strftime("%F")}' 121 122 item = E.item( 123 E.title(title), 124 E.link(f'{concerts_url}#{anchor}'), 125 E.description(cdata_concert(concert, lang)), 126 ) 127 128 pubdate_str = pubdates[concert.time.isoformat(timespec='minutes')] 129 130 if pubdate_str is not None: 131 pubdate = zoned_datetime(datetime.fromisoformat(pubdate_str)) 132 item.append(E.pubDate(pubdate.strftime(DATE_FORMAT))) 133 134 return item 135 136 137 def generate_concerts(concerts_src, concerts_url, concerts_pubdates, lang): 138 with open(concerts_pubdates) as pubdates_file: 139 pubdates = json.load(pubdates_file) 140 141 return tuple( 142 generate_concert(c, concerts_url, pubdates, lang) 143 for c in read_concerts(concerts_src) 144 ) 145 146 147 def main(concerts_src, feed_dst, concerts_pubdates, domain): 148 lang = guess_language(concerts_src) 149 text = LOCALIZED_TEXT[lang] 150 151 url = f'https://{domain}' 152 index_url = urljoin(url, text['indexpath']) 153 concerts_url = urljoin(index_url, 'concerts.html') 154 155 now_formatted = NOW.strftime(DATE_FORMAT) 156 157 concerts = generate_concerts( 158 concerts_src, concerts_url, concerts_pubdates, lang 159 ) 160 161 rss = E.rss( 162 E.channel( 163 E.title(text['title']), 164 E.link(index_url), 165 E.description(text['description']), 166 E.image( 167 E.url(urljoin(url, 'images/logo.svg')), 168 E.link(concerts_url), 169 ), 170 E.lastBuildDate(now_formatted), 171 E.pubDate(now_formatted), 172 E.language(lang), 173 *concerts, 174 ), 175 version='2.0', 176 ) 177 178 indent(rss) 179 180 with open(feed_dst, 'wb') as feed: 181 feed.write(tostring(rss, encoding='utf-8', xml_declaration=True)) 182 183 184 if __name__ == '__main__': 185 main(*argv[1:])