summaryrefslogtreecommitdiff
path: root/admin
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-21 22:20:21 +0100
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-21 22:20:21 +0100
commit05980e4351847e9b9a605d44e8c758d17ab2919f (patch)
tree0778c0a4cd71dd346cffea552f4fe01698d3eba4 /admin
parent1b06fb56aa47384c4fb19c78958aafd6923fd71e (diff)
downloadquatuorbellefeuille.com-05980e4351847e9b9a605d44e8c758d17ab2919f.tar.xz
Tweak stats script
- move referer normalization earlier - remove temporary dot files - draw graphs with arrows
Diffstat (limited to 'admin')
-rwxr-xr-x[-rw-r--r--]admin/stats.py70
1 files changed, 40 insertions, 30 deletions
diff --git a/admin/stats.py b/admin/stats.py
index f69390b..e3d46cc 100644..100755
--- a/admin/stats.py
+++ b/admin/stats.py
@@ -1,7 +1,10 @@
+#!/usr/bin/env python3
+
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import datetime, timedelta
import re
+from os import remove
from subprocess import run
from sys import argv
@@ -38,9 +41,17 @@ class Access:
if resource == '/':
resource = '/index.html'
+ referer = re.sub(
+ r'https://(?:www\.)?quatuorbellefeuille\.(?:fr|com)(/[^?]*)(?:\?.+)?',
+ r'\1',
+ info['referer']
+ )
+ if referer == '/':
+ referer = '/index.html'
+
return cls(
info['address'], user_agents.parse(info['useragent']),
- info['referer'], datetime.strptime(info['date'], DATE_FMT),
+ referer, datetime.strptime(info['date'], DATE_FMT),
resource
)
@@ -96,16 +107,33 @@ def order(grouped_visits):
return visits
-def normalizeref(referer):
- if referer == '-':
- return 'inconnu'
- referer = re.sub(
- r'https://(?:www\.)?quatuorbellefeuille\.(?:fr|com)(/[^?]*)(?:\?.+)?',
- r'\1', referer
- )
- if referer == '/':
- return '/index.html'
- return referer
+def visit_graph(accesses):
+ edges = (f' "{a.referer}" -> "{a.resource}";'
+ for a in accesses)
+ return '\n'.join((f'digraph visit {{', *edges, '}'))
+
+def graph(visits):
+ date = visits[0][0].time.strftime('%F')
+
+ tempfiles = {
+ f'{date}-{i}.pdf': visit for i, visit in enumerate(visits)
+ }
+
+ for tempfile, visit in tempfiles.items():
+ vgraph = visit_graph(visit)
+
+ with open(tempfile, 'wb') as vfile:
+ vfile.write(
+ run(('dot', '-Tpdf'), text=False, check=True,
+ capture_output=True, input=vgraph.encode())
+ .stdout
+ )
+
+ run(('qpdf', '--empty', '--pages', *tempfiles, '--', f'{date}.pdf'),
+ check=True)
+
+ for f in tempfiles:
+ remove(f)
def analyze(logs_path):
accesses = parse(logs_path)
@@ -119,25 +147,7 @@ def analyze(logs_path):
for page, hits in pagehits.most_common():
print(hits, page)
- date = accesses[0].time.strftime('%F')
-
- for i, visit in enumerate(visits_by_time.values()):
- edges = (f' "{normalizeref(a.referer)}" -- "{a.resource}";'
- for a in visit)
- graph = '\n'.join(('graph trip {',
- *edges,
- '}'))
- dot = run(
- ('dot', '-Tpdf'), text=False, capture_output=True, check=True,
- input=graph.encode()
- )
- with open(f'{date}-{i}.pdf', 'wb') as visitgraph:
- visitgraph.write(dot.stdout)
-
- pages = (f'{date}-{i}.pdf' for i in range(len(visits_by_time.values())))
- run(('qpdf', '--empty', '--pages', *pages, '--', f'{date}.pdf'),
- text=False, check=True)
-
+ graph(tuple(visits_by_time.values()))
if __name__ == '__main__':
analyze(argv[1])