summaryrefslogtreecommitdiff
path: root/admin/stats/dump.py
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 17:18:04 +0200
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 17:18:04 +0200
commitf5b7f14573c7c83e00690042ab75088cbe313bd8 (patch)
tree74a776b3bc66c40886ff19430b3b4f6c62336251 /admin/stats/dump.py
parented7bdb8a06658313bb6d94686921b6fb5f4b5a89 (diff)
downloadquatuorbellefeuille.com-f5b7f14573c7c83e00690042ab75088cbe313bd8.tar.xz
Dump number of pages per visit
Diffstat (limited to 'admin/stats/dump.py')
-rwxr-xr-xadmin/stats/dump.py92
1 files changed, 72 insertions, 20 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py
index 20f7c93..d0bdda3 100755
--- a/admin/stats/dump.py
+++ b/admin/stats/dump.py
@@ -4,12 +4,14 @@ from collections import Counter, defaultdict
import csv
from dataclasses import dataclass
from datetime import datetime, timedelta
+from enum import Enum
+from logging import warning
from pathlib import Path
import re
+from statistics import mean, median, stdev
from sys import argv
from urllib.parse import urlparse
from typing import Dict, List, Tuple
-from warnings import warn
import user_agents
@@ -86,28 +88,44 @@ def parse(logs_paths):
Visit = List[Access]
+class UserAgentKind(Enum):
+ PC = 'pc'
+ MOBILE = 'mobile'
+ TABLET = 'tablet'
+ BOT = 'bot'
+ NA = 'n/a'
+
+ @classmethod
+ def from_useragent(cls, ua_string):
+ ua = user_agents.parse(ua_string)
+ # is_bot is not mutually exclusive with other is_* predicates.
+ if ua.is_bot:
+ return cls.BOT
+ if ua.is_pc:
+ return cls.PC
+ if ua.is_mobile:
+ return cls.MOBILE
+ if ua.is_tablet:
+ return cls.TABLET
+ warning(f'Unknown user agent kind: {ua_string}')
+ return cls.NA
+
+ def is_human(self):
+ return self in {
+ UserAgentKind.PC,
+ UserAgentKind.MOBILE,
+ UserAgentKind.TABLET
+ }
+
+
@dataclass
class Visitor:
address: str
- useragent: str
+ useragent: UserAgentKind
referrers: List[str]
visits: List[Visit]
-def useragent_kind(ua_string):
- ua = user_agents.parse(ua_string)
- if ua.is_pc:
- return 'pc'
- if ua.is_mobile:
- return 'mobile'
- if ua.is_tablet:
- return 'tablet'
- if ua.is_bot:
- return 'bot'
- warn(f'Unknown user agent kind: {ua_string}')
- return 'n/a'
-
-
def sort_visits(accesses):
visitors: Dict[Tuple(str, str), Visitor] = {}
@@ -118,7 +136,7 @@ def sort_visits(accesses):
if visitor is None:
visitor = Visitor(
a.address,
- useragent_kind(a.useragent),
+ UserAgentKind.from_useragent(a.useragent),
a.referrer,
[[a]]
)
@@ -155,7 +173,7 @@ def daily_visitors(visitors, output_path):
for v in visitors.values():
for day in find_days(v.visits):
- days[day][v.useragent] += 1
+ days[day][v.useragent.value] += 1
with open(output_path, 'w') as f:
out = csv.writer(f)
@@ -180,7 +198,7 @@ def daily_visits(visitors, output_path):
for v in visitors.values():
for visit in v.visits:
day = datetime_day(visit[0].time)
- days[day][v.useragent] += 1
+ days[day][v.useragent.value] += 1
with open(output_path, 'w') as f:
out = csv.writer(f)
@@ -196,11 +214,45 @@ def daily_visits(visitors, output_path):
print(*values, sep='\t')
+def daily_pages_per_visit(visitors, output_path):
+ days: Dict[datetime, list] = defaultdict(list)
+ columns = ('min', 'max', 'med', 'avg', 'dev')
+
+ print('Pages/visit:')
+
+ for v in visitors.values():
+ if not v.useragent.is_human():
+ continue
+
+ for visit in v.visits:
+ day = datetime_day(visit[0].time)
+ days[day].append(len(visit))
+
+ with open(output_path, 'w') as f:
+ out = csv.writer(f)
+ out.writerow(('day', *columns))
+ print('day', *columns, sep='\t')
+
+ for day in sorted(days):
+ view_counts = days[day]
+ values = (
+ day.strftime('%F'),
+ min(view_counts),
+ max(view_counts),
+ median(view_counts),
+ mean(view_counts),
+ stdev(view_counts)
+ )
+
+ out.writerow(values)
+ print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t')
+
+
def daily_stats(visitors, output_dir):
output_dir = Path(output_dir)
daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv'))
daily_visits(visitors, output_dir.joinpath('dailyvisits.csv'))
- # daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv'))
+ daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv'))
# daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv'))
# daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv'))