generate-index.py (3857B)
1 #!/usr/bin/env python3 2 3 from argparse import ArgumentParser 4 from itertools import chain 5 import json 6 from os import path 7 from pathlib import Path 8 from subprocess import run 9 from tempfile import NamedTemporaryFile 10 11 from git import Repo 12 13 from helpers import deserialize_directories, generate_crumbs, PandocRunner 14 15 16 def parse_arguments(): 17 parser = ArgumentParser() 18 parser.add_argument( 19 '--template', help='Pandoc template for HTML output.' 20 ) 21 parser.add_argument( 22 '--site-title', help='Prefix to add to <title>.' 23 ) 24 parser.add_argument( 25 '--lua-filter', dest='filters', action='append', 26 help='Lua filter to run the page through.' 27 ) 28 parser.add_argument( 29 '--stylesheet', dest='css', action='append', 30 help='CSS stylesheet to link to.' 31 ) 32 parser.add_argument( 33 'site_tree', help='JSON file describing the page tree.' 34 ) 35 parser.add_argument( 36 'target', help='Subfolder to generate an index for.' 37 ) 38 parser.add_argument( 39 'output', help='Path to the output file.' 40 ) 41 return parser.parse_args() 42 43 44 def list_files(tree_file, folder): 45 with open(tree_file) as tree: 46 directories = deserialize_directories(json.load(tree)) 47 return directories[folder].subfolders, directories[folder].files 48 49 50 def has_title(document_path): 51 pandoc = run( 52 ('pandoc', '-t', 'json', document_path), 53 check=True, text=True, capture_output=True 54 ) 55 ast = json.loads(pandoc.stdout) 56 return 'title' in ast['meta'] 57 58 59 def list_pages(files): 60 readme = None 61 pages = [] 62 63 for f in files: 64 page = Path(f).stem 65 66 if page == 'README': 67 readme = f 68 else: 69 pages.append(page) 70 71 return pages, readme 72 73 74 def format_toc(directories, pages): 75 dir_template = '<li><a href="{d}/index.html">{d}/</a></li>' 76 page_template = '<li><a href="{p}.html">{p}</a></li>' 77 78 dir_list = ( 79 dir_template.format(d=d) for d in directories 80 ) 81 page_list = ( 82 page_template.format(p=p) for p in pages 83 ) 84 85 return '\n'.join(( 86 '<ul>', *chain(dir_list, page_list), '</ul>' 87 )) 88 89 90 def main(arguments): 91 target = arguments.target 92 folders, files = list_files(arguments.site_tree, target) 93 pages, readme = list_pages(files) 94 95 toc_title = f'Index for {target}' if target else 'Index' 96 html_toc = format_toc(folders, pages) 97 98 path_to_top = path.relpath('.', start=target) 99 stylesheets = (path.join(path_to_top, s) for s in arguments.css) 100 101 variables = {'crumbs': generate_crumbs(Path(target)/'index')} 102 metadata = {} 103 if arguments.site_title is not None: 104 metadata['sitetitle'] = arguments.site_title 105 106 pandoc = PandocRunner( 107 arguments.output, arguments.template, arguments.filters, 108 stylesheets, variables 109 ) 110 111 if readme is None: 112 with NamedTemporaryFile(suffix='.md') as dummy_readme, \ 113 NamedTemporaryFile(mode='w+') as toc: 114 toc.write(html_toc) 115 toc.flush() 116 117 metadata['pagetitle'] = toc_title 118 metadata['title'] = 'Index' 119 120 pandoc.run( 121 dummy_readme.name, include_after=(toc.name,), metadata=metadata 122 ) 123 return 124 125 repo_top = Repo(search_parent_directories=True).working_dir 126 readme_path = Path(repo_top, target, readme) 127 128 # If the README doesn't have a title, give a default to pandoc 129 # out-of-band. 130 if not has_title(readme_path): 131 metadata['pagetitle'] = target or 'README' 132 133 with NamedTemporaryFile(mode='w+') as toc: 134 toc.write(f'<h1>{toc_title}</h1>\n') 135 toc.write(html_toc) 136 toc.flush() 137 138 pandoc.run( 139 readme_path, include_after=(toc.name,), metadata=metadata 140 ) 141 142 143 if __name__ == '__main__': 144 main(parse_arguments())