memory-leaks

Still reachable: lots of words in many pages.
git clone https://git.kevinlegouguec.net/memory-leaks
Log | Files | Refs | README | LICENSE

preprocess-org.el (3358B)


      1 ;; -*- lexical-binding: t -*-
      2 
      3 ;; How I Convert Org Files To HTML.
      4 ;; ================================
      5 ;;
      6 ;; Or: Why We Can't Have Nice Things: Exhibit #42.
      7 ;;     -------------------------------------------
      8 ;;
      9 ;; Or: I Got Way Too Much Time On My Hands, Apparently.
     10 ;;     ------------------------------------------------
     11 ;;
     12 ;; I see two straightforward ways to export Org files to HTML:
     13 ;;
     14 ;; 1. ox-html.el, Org's HTML backend: even with all the settings and
     15 ;;    filters available, there are still a few things that annoy me:
     16 ;;    lots of extra <div>s, unstable section IDs…
     17 ;;
     18 ;;    Also, I want to squeeze pandoc somewhere in the pipeline, to run
     19 ;;    my Lua filters.
     20 ;;
     21 ;; 2. pandoc: does not cover all of Org's features.  Org is so crammed
     22 ;;    with constructs that don't exist in other markup formats
     23 ;;    (agendas, logbooks, spreadsheets, properties…) and so many knobs
     24 ;;    can be tweaked on a per-file basis (link abbreviations, tags,
     25 ;;    TODO cycles) that Elisp remains the least painful way to process
     26 ;;    these files, IMO.
     27 ;;
     28 ;; A less-straightforward, but still reasonably simple way to go would
     29 ;; be to use Org's markdown backend, then run pandoc on the result.
     30 ;; Unfortunately, AFAICT ox-md.el does not implement definition lists,
     31 ;; nor syntax-highlighting in fenced code blocks.
     32 ;;
     33 ;; So here's where I'm at: using Elisp, I'll preprocess Org files to
     34 ;; add a bunch of #+OPTIONS pandoc recognizes, "dumb down" the stuff
     35 ;; pandoc does not recognize, format some other stuff arbitrarily,
     36 ;; *then* I'll run pandoc on the result.
     37 
     38 (defun pp-org/list-tags ()
     39   (goto-char (point-min))
     40   (while (re-search-forward org-heading-regexp nil t)
     41     (save-excursion
     42       (save-match-data
     43         (when-let ((tags (org-get-tags (point))))
     44           (insert "\n#+begin_tags\n")
     45           (dolist (tag tags)
     46             (insert "- " tag "\n"))
     47           (insert "#+end_tags\n"))))))
     48 
     49 (defun pp-org/expand-links ()
     50   ;; Expand #+LINK abbreviations, since pandoc does not grok them.
     51   ;; Also, use the abbreviation as default description for links that
     52   ;; lack one.
     53   (pcase-dolist (`(,key . ,expansion) org-link-abbrev-alist-local)
     54     (goto-char (point-min))
     55     (let ((link-re (rx "[[" (group (literal key) ":"
     56                                    (group (+ (not "]"))))
     57                        "]" (? (group "["
     58                                      (group (+ (not "]")))
     59                                      "]"))
     60                        "]"))
     61           (expand-link (if (string-match-p "%s" expansion)
     62                            (lambda (tag) (format expansion tag))
     63                          (lambda (tag) (concat expansion tag)))))
     64       (while (re-search-forward link-re nil t)
     65         (let ((link-beg (match-beginning 0))
     66               (link-abbrev (match-string 1))
     67               (link-tag (match-string 2))
     68               (description (match-string 4)))
     69           (replace-match (funcall expand-link link-tag) t t nil 1)
     70           (unless description
     71             (save-excursion
     72               (goto-char (1+ link-beg))
     73               (forward-sexp)
     74               (insert (format "[%s]" link-abbrev)))))))))
     75 
     76 (defun preprocess-org (input)
     77   (with-temp-buffer
     78     (insert "#+OPTIONS: ^:{} tags:nil H:6\n")
     79     (insert-file-contents input)
     80     (org-mode)
     81     (pp-org/list-tags)
     82     (pp-org/expand-links)
     83     (princ (buffer-string))))