diff options
Diffstat (limited to 'repo/www')
| -rw-r--r-- | repo/www/TODO | 14 | ||||
| -rwxr-xr-x | repo/www/generate-index.py | 53 | ||||
| -rwxr-xr-x | repo/www/generate-page.py | 9 | ||||
| -rw-r--r-- | repo/www/helpers.py | 88 | ||||
| -rw-r--r-- | repo/www/preprocess-org.el | 83 |
5 files changed, 191 insertions, 56 deletions
diff --git a/repo/www/TODO b/repo/www/TODO index 7eac4fe..1799529 100644 --- a/repo/www/TODO +++ b/repo/www/TODO @@ -1,15 +1,13 @@ -- preprocess Org files - Org's HTML backend adds a lot of stuff I don't like (intermediate - divs, unstable section IDs); I'll use the markdown backend, then - feed that to pandoc - - change description of custom +LINKs - - convert properties - - convert tags +- org preprocessing: + - dump properties + - fontify TODO keywords - compute "leak count" on toplevel index - get stylin' - pandoc template - tufte css? at least sidenotes rather than footnotes -- use tags somehow (eg to fill in the "keywords" metadata in pandoc template) +- use tags somehow, eg + - fill in the "keywords" metadata in pandoc template + - index files/sections by tags - add author - add creation & last update dates - link to history diff --git a/repo/www/generate-index.py b/repo/www/generate-index.py index 16d1874..ab5b2a4 100755 --- a/repo/www/generate-index.py +++ b/repo/www/generate-index.py @@ -10,7 +10,7 @@ from tempfile import NamedTemporaryFile from git import Repo -from helpers import deserialize_directories, generate_crumbs, pandoc +from helpers import deserialize_directories, generate_crumbs, PandocRunner def parse_arguments(): @@ -103,41 +103,40 @@ def main(arguments): if arguments.site_title is not None: metadata['sitetitle'] = arguments.site_title - if readme is not None: - repo_top = Repo(search_parent_directories=True).working_dir - readme_path = Path(repo_top, target, readme) - - # If the README doesn't have a title, give a default to pandoc - # out-of-band. - if not has_title(readme_path): - metadata['pagetitle'] = target or 'README' + pandoc = PandocRunner( + arguments.output, arguments.template, arguments.filters, + stylesheets, variables + ) - with NamedTemporaryFile(mode='w+') as toc: - toc.write(f'<h1>{toc_title}</h1>\n') + if readme is None: + with NamedTemporaryFile(suffix='.md') as dummy_readme, \ + NamedTemporaryFile(mode='w+') as toc: toc.write(html_toc) toc.flush() - pandoc( - readme_path, arguments.output, - arguments.template, arguments.filters, stylesheets, - include_after=(toc.name,), - variables=variables, metadata=metadata + metadata['pagetitle'] = toc_title + metadata['title'] = 'Index' + + pandoc.run( + dummy_readme.name, include_after=(toc.name,), metadata=metadata ) - return + return - with NamedTemporaryFile(suffix='.md') as dummy_readme, \ - NamedTemporaryFile(mode='w+') as toc: + repo_top = Repo(search_parent_directories=True).working_dir + readme_path = Path(repo_top, target, readme) + + # If the README doesn't have a title, give a default to pandoc + # out-of-band. + if not has_title(readme_path): + metadata['pagetitle'] = target or 'README' + + with NamedTemporaryFile(mode='w+') as toc: + toc.write(f'<h1>{toc_title}</h1>\n') toc.write(html_toc) toc.flush() - metadata['pagetitle'] = toc_title - metadata['title'] = 'Index' - - pandoc( - dummy_readme.name, arguments.output, - arguments.template, arguments.filters, stylesheets, - include_after=(toc.name,), - variables=variables, metadata=metadata + pandoc.run( + readme_path, include_after=(toc.name,), metadata=metadata ) diff --git a/repo/www/generate-page.py b/repo/www/generate-page.py index cb2317b..bbe1288 100755 --- a/repo/www/generate-page.py +++ b/repo/www/generate-page.py @@ -6,7 +6,7 @@ from pathlib import Path from git import Repo -from helpers import generate_crumbs, pandoc +from helpers import generate_crumbs, PandocRunner def parse_arguments(): @@ -44,13 +44,16 @@ def main(arguments): page_path = Path(arguments.page).resolve().relative_to(repo_top) - pandoc( - arguments.page, + pandoc = PandocRunner( arguments.output, arguments.template, arguments.filters, stylesheets, variables={'crumbs': generate_crumbs(page_path)}, + ) + + pandoc.run( + arguments.page, metadata={'pagetitle': arguments.title, 'sitetitle': arguments.site_title} ) diff --git a/repo/www/helpers.py b/repo/www/helpers.py index 48ebccf..12d9a41 100644 --- a/repo/www/helpers.py +++ b/repo/www/helpers.py @@ -2,8 +2,10 @@ from collections import defaultdict from dataclasses import dataclass, field from itertools import chain from os import environ, path -from subprocess import run -from typing import Iterator +from pathlib import Path +from subprocess import CalledProcessError, run +from tempfile import NamedTemporaryFile +from typing import Dict, Iterator, Union @dataclass @@ -56,26 +58,76 @@ def deserialize_directories(directories): } -def pandoc(page, output, template, filters, stylesheets, include_after=(), - variables=None, metadata=None): - cmd = ( - 'pandoc', '-s', page, '-o', output, '--template', template, - *chain(*(('--lua-filter', f) for f in filters)), - *chain(*(('--css', s) for s in stylesheets)), - *chain(*(('--include-after-body', f) for f in include_after)) - ) +class _NullPreprocessor: + def __init__(self, source_path): + self._source_path = source_path + + def __enter__(self): + self.output = self._source_path + return self + + def __exit__(self, *args): + pass + +class _OrgPreprocessor: + def __init__(self, source_path): + self._source_path = source_path + + def __enter__(self): + self._output = NamedTemporaryFile(mode='w+', suffix='.org') + try: + run(( + 'emacs', '-Q', '--batch', '--load', 'preprocess-org.el', + '--eval', f'(preprocess-org "{self._source_path}")' + ), check=True, stdout=self._output) + except CalledProcessError: + self._output.close() + raise + + self.output = self._output.name + return self + + def __exit__(self, *args): + self._output.close() + +_PREPROCESSORS = defaultdict(lambda: _NullPreprocessor, + (('org', _OrgPreprocessor),)) + + +_PathArg = Union[Path, str, bytes] + +@dataclass +class PandocRunner: + output: _PathArg + template: _PathArg + filters: Iterator[_PathArg] + stylesheets: Iterator[_PathArg] + variables: Dict[str, str] = field(default_factory=dict) + + def run(self, page, include_after=(), metadata=None): + cmd = ( + 'pandoc', '-s', '-o', self.output, '--template', self.template, + *chain(*(('--lua-filter', f) for f in self.filters)), + *chain(*(('--css', s) for s in self.stylesheets)), + *chain(*(('--include-after-body', f) for f in include_after)) + ) - if variables is not None: - cmd += tuple(chain( - *(('-V', f'{k}={v}') for k, v in variables.items()) - )) - if metadata is not None: cmd += tuple(chain( - *(('-M', f'{k}={v}') for k, v in metadata.items()) + *(('-V', f'{k}={v}') for k, v in self.variables.items()) )) + if metadata is not None: + cmd += tuple(chain( + *(('-M', f'{k}={v}') for k, v in metadata.items()) + )) + + environ['LUA_PATH'] = '.cache/?.lua;;' + + _, ext = path.splitext(page) + preprocessor = _PREPROCESSORS[ext[1:]] - environ['LUA_PATH'] = '.cache/?.lua;;' - run(cmd, check=True) + with preprocessor(page) as preproc: + cmd = cmd + (preproc.output,) + run(cmd, check=True) def generate_crumbs(target): diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el new file mode 100644 index 0000000..fe63962 --- /dev/null +++ b/repo/www/preprocess-org.el @@ -0,0 +1,83 @@ +;; -*- lexical-binding: t -*- + +;; How I Convert Org Files To HTML. +;; ================================ +;; +;; Or: Why We Can't Have Nice Things: Exhibit #42. +;; ------------------------------------------- +;; +;; Or: I Got Way Too Much Time On My Hands, Apparently. +;; ------------------------------------------------ +;; +;; I see two straightforward ways to export Org files to HTML: +;; +;; 1. ox-html.el, Org's HTML backend: even with all the settings and +;; filters available, there are still a few things that annoy me: +;; lots of extra <div>s, unstable section IDs… +;; +;; Also, I want to squeeze pandoc somewhere in the pipeline, to run +;; my Lua filters. +;; +;; 2. pandoc: does not cover all of Org's features. Org is so crammed +;; with constructs that don't exist in other markup formats +;; (agendas, logbooks, spreadsheets, properties…) and so many knobs +;; can be tweaked on a per-file basis (link abbreviations, tags, +;; TODO cycles) that Elisp remains the least painful way to process +;; these files, IMO. +;; +;; A less-straightforward, but still reasonably simple way to go would +;; be to use Org's markdown backend, then run pandoc on the result. +;; Unfortunately, AFAICT ox-md.el does not implement definition lists, +;; nor syntax-highlighting in fenced code blocks. +;; +;; So here's where I'm at: using Elisp, I'll preprocess Org files to +;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff +;; pandoc does not recognize, format some other stuff arbitrarily, +;; *then* I'll run pandoc on the result. + +(defun pp-org/list-tags () + (goto-char (point-min)) + (while (re-search-forward org-heading-regexp nil t) + (save-excursion + (save-match-data + (when-let ((tags (org-get-tags (point)))) + (insert "\n#+begin_tags\n") + (dolist (tag tags) + (insert "- " tag "\n")) + (insert "#+end_tags\n")))))) + +(defun pp-org/expand-links () + ;; Expand #+LINK abbreviations, since pandoc does not grok them. + ;; Also, use the abbreviation as default description for links that + ;; lack one. + (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local) + (goto-char (point-min)) + (let ((link-re (rx "[[" (group (literal key) ":" + (group (+ (not "]")))) + "]" (? (group "[" + (group (+ (not "]"))) + "]")) + "]")) + (expand-link (if (string-match-p "%s" expansion) + (lambda (tag) (format expansion tag)) + (lambda (tag) (concat expansion tag))))) + (while (re-search-forward link-re nil t) + (let ((link-beg (match-beginning 0)) + (link-abbrev (match-string 1)) + (link-tag (match-string 2)) + (description (match-string 4))) + (replace-match (funcall expand-link link-tag) t t nil 1) + (unless description + (save-excursion + (goto-char (1+ link-beg)) + (forward-sexp) + (insert (format "[%s]" link-abbrev))))))))) + +(defun preprocess-org (input) + (with-temp-buffer + (insert "#+OPTIONS: ^:{} tags:nil H:6\n") + (insert-file-contents input) + (org-mode) + (pp-org/list-tags) + (pp-org/expand-links) + (princ (buffer-string)))) |
