summaryrefslogtreecommitdiff
path: root/admin/stats/dump.py
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 17:40:42 +0200
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 17:40:42 +0200
commit2ac0e2bf6bc2abebba611147ec994da5b6c06e10 (patch)
tree85b6ae7359aeef5d4a555e7ee0ca2af81574ba27 /admin/stats/dump.py
parentf5b7f14573c7c83e00690042ab75088cbe313bd8 (diff)
downloadquatuorbellefeuille.com-2ac0e2bf6bc2abebba611147ec994da5b6c06e10.tar.xz
Add daily page views
Diffstat (limited to 'admin/stats/dump.py')
-rwxr-xr-xadmin/stats/dump.py46
1 files changed, 43 insertions, 3 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py
index d0bdda3..ac412c9 100755
--- a/admin/stats/dump.py
+++ b/admin/stats/dump.py
@@ -10,7 +10,7 @@ from pathlib import Path
import re
from statistics import mean, median, stdev
from sys import argv
-from urllib.parse import urlparse
+from urllib.parse import unquote, urlparse
from typing import Dict, List, Tuple
import user_agents
@@ -41,7 +41,7 @@ DOMAINS = {
def normalize_path(p):
if p == '/':
return '/index.html'
- return p
+ return unquote(p)
@dataclass
@@ -165,6 +165,16 @@ def find_days(visits):
}
+def find_pages(visitors):
+ return sorted({
+ access.resource
+ for v in visitors
+ for visit in v.visits
+ for access in visit
+ if v.useragent.is_human()
+ })
+
+
def daily_visitors(visitors, output_path):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a')
@@ -248,12 +258,42 @@ def daily_pages_per_visit(visitors, output_path):
print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t')
+def daily_page_hits(visitors, output_path):
+ days: Dict[datetime, Counter] = defaultdict(Counter)
+ columns = find_pages(visitors.values())
+
+ print('Page hits:')
+
+ for v in visitors.values():
+ if not v.useragent.is_human():
+ continue
+
+ for visit in v.visits:
+ day = datetime_day(visit[0].time)
+ for access in visit:
+ days[day][access.resource] += 1
+
+ with open(output_path, 'w') as f:
+ out = csv.writer(f)
+ out.writerow(('day', *columns))
+
+ for day in sorted(days):
+ page_hits = days[day]
+ values = (day.strftime('%F'),
+ *(page_hits[page] for page in columns))
+ out.writerow(values)
+
+ print(day.strftime('%F'))
+ for page, hits in page_hits.most_common(5):
+ print(hits, page, sep='\t')
+
+
def daily_stats(visitors, output_dir):
output_dir = Path(output_dir)
daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv'))
daily_visits(visitors, output_dir.joinpath('dailyvisits.csv'))
daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv'))
- # daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv'))
+ daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv'))
# daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv'))