diff options
| author | Kévin Le Gouguec <kevin.legouguec@gmail.com> | 2021-03-20 20:21:31 +0100 |
|---|---|---|
| committer | Kévin Le Gouguec <kevin.legouguec@gmail.com> | 2021-03-20 20:21:31 +0100 |
| commit | de768184fc9365cdf1480fb7c72f6192aeac5eeb (patch) | |
| tree | bd8646b1c8563dc31914e6b3fb3866ceb3a71e41 | |
| parent | 194f41f2f3826e1f37ecaa5a39ae41708808ad6f (diff) | |
| download | quatuorbellefeuille.com-de768184fc9365cdf1480fb7c72f6192aeac5eeb.tar.xz | |
Draw graphs
Ugly, absolutely unreadable graphs, but still.
| -rw-r--r-- | admin/stats.py | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/admin/stats.py b/admin/stats.py index e79784c..6dc234c 100644 --- a/admin/stats.py +++ b/admin/stats.py @@ -2,6 +2,7 @@ from collections import Counter, defaultdict from dataclasses import dataclass from datetime import datetime, timedelta import re +from subprocess import run from sys import argv import user_agents @@ -33,10 +34,14 @@ class Access: @classmethod def from_log(cls, info): + resource = info['resource'] + if resource == '/': + resource = '/index.html' + return cls( info['address'], user_agents.parse(info['useragent']), info['referer'], datetime.strptime(info['date'], DATE_FMT), - info['resource'] + resource ) def interesting(resource): @@ -52,7 +57,9 @@ def parse(logs_path): matches = (ACCESS_RE.match(l) for l in logs) return tuple( Access.from_log(m) for m in matches - if m is not None and interesting(m['resource']) + if (m is not None + and interesting(m['resource']) + and 'klg.uber.space' not in m['referer']) ) def key(access): @@ -89,6 +96,14 @@ def order(grouped_visits): return visits +def normalizeref(referer): + if referer == '-': + return 'inconnu' + return re.sub( + r'https://(?:www\.)?quatuorbellefeuille\.(?:fr|com)(/[^?]*)(?:\?.+)?', + r'\1', referer + ) + def analyze(logs_path): accesses = parse(logs_path) visits_by_visitor = visits(accesses) @@ -101,6 +116,21 @@ def analyze(logs_path): for page, hits in pagehits.most_common(): print(hits, page) + edges = Counter( + (a.resource, normalizeref(a.referer)) for a in accesses + ) + edge_statements = '\n'.join( + f' "{ref}" -- "{res}";' for (res, ref) in edges.keys() + ) + graph = f'''\ +graph trip {{ +{edge_statements} +}} +''' + dot = run(('dot', '-Tpdf'), text=False, input=graph.encode(), + capture_output=True, check=True) + with open('graph.pdf', 'wb') as graph_file: + graph_file.write(dot.stdout) if __name__ == '__main__': analyze(argv[1]) |
