summaryrefslogtreecommitdiff
path: root/repo/www/preprocess-org.el
blob: fe63962aa9ec9414c4e885e41680aab9ab7f291b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
;; -*- lexical-binding: t -*-

;; How I Convert Org Files To HTML.
;; ================================
;;
;; Or: Why We Can't Have Nice Things: Exhibit #42.
;;     -------------------------------------------
;;
;; Or: I Got Way Too Much Time On My Hands, Apparently.
;;     ------------------------------------------------
;;
;; I see two straightforward ways to export Org files to HTML:
;;
;; 1. ox-html.el, Org's HTML backend: even with all the settings and
;;    filters available, there are still a few things that annoy me:
;;    lots of extra <div>s, unstable section IDs…
;;
;;    Also, I want to squeeze pandoc somewhere in the pipeline, to run
;;    my Lua filters.
;;
;; 2. pandoc: does not cover all of Org's features.  Org is so crammed
;;    with constructs that don't exist in other markup formats
;;    (agendas, logbooks, spreadsheets, properties…) and so many knobs
;;    can be tweaked on a per-file basis (link abbreviations, tags,
;;    TODO cycles) that Elisp remains the least painful way to process
;;    these files, IMO.
;;
;; A less-straightforward, but still reasonably simple way to go would
;; be to use Org's markdown backend, then run pandoc on the result.
;; Unfortunately, AFAICT ox-md.el does not implement definition lists,
;; nor syntax-highlighting in fenced code blocks.
;;
;; So here's where I'm at: using Elisp, I'll preprocess Org files to
;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff
;; pandoc does not recognize, format some other stuff arbitrarily,
;; *then* I'll run pandoc on the result.

(defun pp-org/list-tags ()
  (goto-char (point-min))
  (while (re-search-forward org-heading-regexp nil t)
    (save-excursion
      (save-match-data
        (when-let ((tags (org-get-tags (point))))
          (insert "\n#+begin_tags\n")
          (dolist (tag tags)
            (insert "- " tag "\n"))
          (insert "#+end_tags\n"))))))

(defun pp-org/expand-links ()
  ;; Expand #+LINK abbreviations, since pandoc does not grok them.
  ;; Also, use the abbreviation as default description for links that
  ;; lack one.
  (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local)
    (goto-char (point-min))
    (let ((link-re (rx "[[" (group (literal key) ":"
                                   (group (+ (not "]"))))
                       "]" (? (group "["
                                     (group (+ (not "]")))
                                     "]"))
                       "]"))
          (expand-link (if (string-match-p "%s" expansion)
                           (lambda (tag) (format expansion tag))
                         (lambda (tag) (concat expansion tag)))))
      (while (re-search-forward link-re nil t)
        (let ((link-beg (match-beginning 0))
              (link-abbrev (match-string 1))
              (link-tag (match-string 2))
              (description (match-string 4)))
          (replace-match (funcall expand-link link-tag) t t nil 1)
          (unless description
            (save-excursion
              (goto-char (1+ link-beg))
              (forward-sexp)
              (insert (format "[%s]" link-abbrev)))))))))

(defun preprocess-org (input)
  (with-temp-buffer
    (insert "#+OPTIONS: ^:{} tags:nil H:6\n")
    (insert-file-contents input)
    (org-mode)
    (pp-org/list-tags)
    (pp-org/expand-links)
    (princ (buffer-string))))