summaryrefslogtreecommitdiff
path: root/repo/www/preprocess-org.el
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2020-11-25 19:33:59 +0100
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2020-11-25 19:33:59 +0100
commite1a80a5596dddc9582969e0a95fa8c09882085a9 (patch)
treecd60ce5f2ea2192a89d205e0f5950edb8d6b6486 /repo/www/preprocess-org.el
parente72881f68cc3f2ddfbbd3f51449e0251042473ef (diff)
parent5fbd9a9e13332a8867eef3d2f408df24b19a34ef (diff)
downloadmemory-leaks-e1a80a5596dddc9582969e0a95fa8c09882085a9.tar.xz
Merge branch 'preprocess-org'
Diffstat (limited to 'repo/www/preprocess-org.el')
-rw-r--r--repo/www/preprocess-org.el83
1 files changed, 83 insertions, 0 deletions
diff --git a/repo/www/preprocess-org.el b/repo/www/preprocess-org.el
new file mode 100644
index 0000000..fe63962
--- /dev/null
+++ b/repo/www/preprocess-org.el
@@ -0,0 +1,83 @@
+;; -*- lexical-binding: t -*-
+
+;; How I Convert Org Files To HTML.
+;; ================================
+;;
+;; Or: Why We Can't Have Nice Things: Exhibit #42.
+;; -------------------------------------------
+;;
+;; Or: I Got Way Too Much Time On My Hands, Apparently.
+;; ------------------------------------------------
+;;
+;; I see two straightforward ways to export Org files to HTML:
+;;
+;; 1. ox-html.el, Org's HTML backend: even with all the settings and
+;; filters available, there are still a few things that annoy me:
+;; lots of extra <div>s, unstable section IDs…
+;;
+;; Also, I want to squeeze pandoc somewhere in the pipeline, to run
+;; my Lua filters.
+;;
+;; 2. pandoc: does not cover all of Org's features. Org is so crammed
+;; with constructs that don't exist in other markup formats
+;; (agendas, logbooks, spreadsheets, properties…) and so many knobs
+;; can be tweaked on a per-file basis (link abbreviations, tags,
+;; TODO cycles) that Elisp remains the least painful way to process
+;; these files, IMO.
+;;
+;; A less-straightforward, but still reasonably simple way to go would
+;; be to use Org's markdown backend, then run pandoc on the result.
+;; Unfortunately, AFAICT ox-md.el does not implement definition lists,
+;; nor syntax-highlighting in fenced code blocks.
+;;
+;; So here's where I'm at: using Elisp, I'll preprocess Org files to
+;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff
+;; pandoc does not recognize, format some other stuff arbitrarily,
+;; *then* I'll run pandoc on the result.
+
+(defun pp-org/list-tags ()
+ (goto-char (point-min))
+ (while (re-search-forward org-heading-regexp nil t)
+ (save-excursion
+ (save-match-data
+ (when-let ((tags (org-get-tags (point))))
+ (insert "\n#+begin_tags\n")
+ (dolist (tag tags)
+ (insert "- " tag "\n"))
+ (insert "#+end_tags\n"))))))
+
+(defun pp-org/expand-links ()
+ ;; Expand #+LINK abbreviations, since pandoc does not grok them.
+ ;; Also, use the abbreviation as default description for links that
+ ;; lack one.
+ (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local)
+ (goto-char (point-min))
+ (let ((link-re (rx "[[" (group (literal key) ":"
+ (group (+ (not "]"))))
+ "]" (? (group "["
+ (group (+ (not "]")))
+ "]"))
+ "]"))
+ (expand-link (if (string-match-p "%s" expansion)
+ (lambda (tag) (format expansion tag))
+ (lambda (tag) (concat expansion tag)))))
+ (while (re-search-forward link-re nil t)
+ (let ((link-beg (match-beginning 0))
+ (link-abbrev (match-string 1))
+ (link-tag (match-string 2))
+ (description (match-string 4)))
+ (replace-match (funcall expand-link link-tag) t t nil 1)
+ (unless description
+ (save-excursion
+ (goto-char (1+ link-beg))
+ (forward-sexp)
+ (insert (format "[%s]" link-abbrev)))))))))
+
+(defun preprocess-org (input)
+ (with-temp-buffer
+ (insert "#+OPTIONS: ^:{} tags:nil H:6\n")
+ (insert-file-contents input)
+ (org-mode)
+ (pp-org/list-tags)
+ (pp-org/expand-links)
+ (princ (buffer-string))))