memory-leaks

Still reachable: lots of words in many pages.
git clone https://git.kevinlegouguec.net/memory-leaks
Log | Files | Refs | README | LICENSE

helpers.py (4236B)


      1 from collections import defaultdict
      2 from dataclasses import dataclass, field
      3 from itertools import chain
      4 from os import environ, path
      5 from pathlib import Path
      6 from subprocess import CalledProcessError, run
      7 from tempfile import NamedTemporaryFile
      8 from typing import Dict, Iterator, Union
      9 
     10 
     11 @dataclass
     12 class Directory:
     13     files: Iterator[str] = field(default_factory=list)
     14     subfolders: Iterator[str] = field(default_factory=set)
     15 
     16     def serialize(self):
     17         return {
     18             'files': sorted(self.files),
     19             'subfolders': sorted(self.subfolders)
     20         }
     21 
     22     @classmethod
     23     def deserialize(cls, d):
     24         return cls(**d)
     25 
     26 
     27 def _find_files(extensions, repository):
     28     patterns = (f'**.{ext}' for ext in extensions)
     29     zero = '\x00'
     30     return repository.git.ls_files('-z', *patterns).strip(zero).split(zero)
     31 
     32 
     33 def _fill_directories(files, top_dir):
     34     directories = defaultdict(Directory)
     35 
     36     for f in files:
     37         fdir, fname = path.split(f)
     38 
     39         directories[fdir].files.append(fname)
     40 
     41         while fdir:
     42             parent, child = path.split(fdir)
     43             directories[parent].subfolders.add(child)
     44             fdir = parent
     45 
     46     return directories
     47 
     48 
     49 def compute_directories(extensions, repository):
     50     files = _find_files(extensions, repository)
     51     top_dir = path.relpath(repository.working_dir, path.curdir)
     52     return _fill_directories(files, top_dir)
     53 
     54 
     55 def deserialize_directories(directories):
     56     return {
     57         k: Directory.deserialize(v) for k, v in directories.items()
     58     }
     59 
     60 
     61 class _NullPreprocessor:
     62     def __init__(self, source_path):
     63         self._source_path = source_path
     64 
     65     def __enter__(self):
     66         self.output = self._source_path
     67         return self
     68 
     69     def __exit__(self, *args):
     70         pass
     71 
     72 class _OrgPreprocessor:
     73     def __init__(self, source_path):
     74         self._source_path = source_path
     75 
     76     def __enter__(self):
     77         self._output = NamedTemporaryFile(mode='w+', suffix='.org')
     78         try:
     79             run((
     80                 'emacs', '-Q', '--batch', '--load', 'preprocess-org.el',
     81                 '--eval', f'(preprocess-org "{self._source_path}")'
     82             ), check=True, stdout=self._output)
     83         except CalledProcessError:
     84             self._output.close()
     85             raise
     86 
     87         self.output = self._output.name
     88         return self
     89 
     90     def __exit__(self, *args):
     91         self._output.close()
     92 
     93 _PREPROCESSORS = defaultdict(lambda: _NullPreprocessor,
     94                              (('org', _OrgPreprocessor),))
     95 
     96 
     97 _PathArg = Union[Path, str, bytes]
     98 
     99 @dataclass
    100 class PandocRunner:
    101     output: _PathArg
    102     template: _PathArg
    103     filters: Iterator[_PathArg]
    104     stylesheets: Iterator[_PathArg]
    105     variables: Dict[str, str] = field(default_factory=dict)
    106 
    107     def run(self, page, include_after=(), metadata=None):
    108         cmd = (
    109             'pandoc', '-s', '-o', self.output, '--template', self.template,
    110             *chain(*(('--lua-filter', f) for f in self.filters)),
    111             *chain(*(('--css', s) for s in self.stylesheets)),
    112             *chain(*(('--include-after-body', f) for f in include_after))
    113         )
    114 
    115         cmd += tuple(chain(
    116             *(('-V', f'{k}={v}') for k, v in self.variables.items())
    117         ))
    118         if metadata is not None:
    119             cmd += tuple(chain(
    120                 *(('-M', f'{k}={v}') for k, v in metadata.items())
    121             ))
    122 
    123         environ['LUA_PATH'] = '.cache/?.lua;;'
    124 
    125         _, ext = path.splitext(page)
    126         preprocessor = _PREPROCESSORS[ext[1:]]
    127 
    128         with preprocessor(page) as preproc:
    129             cmd = cmd + (preproc.output,)
    130             run(cmd, check=True)
    131 
    132 
    133 def generate_crumbs(target):
    134     parts = ('(top)', *target.parts)
    135 
    136     if parts[-1] == 'index':
    137         *crumbs, current = parts[:-1]
    138     else:
    139         crumbs = parts[:-1]
    140         current, _ = path.splitext(parts[-1])
    141 
    142     crumbs_li = (
    143         '<li><a href="{link}">{crumb}</a></li>'.format(
    144             link=(path.relpath(path.join(*crumbs[1:i], 'index.html'),
    145                                start=target.parent)),
    146             crumb=crumb
    147         )
    148         for i, crumb in enumerate(crumbs, start=1)
    149     )
    150 
    151     current_li = f'<li aria-current="page">{current}</li>'
    152 
    153     return '\n'.join((*crumbs_li, current_li))