diff options
Diffstat (limited to 'admin/stats')
| -rwxr-xr-x | admin/stats/dump.py | 92 |
1 files changed, 72 insertions, 20 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py index 20f7c93..d0bdda3 100755 --- a/admin/stats/dump.py +++ b/admin/stats/dump.py @@ -4,12 +4,14 @@ from collections import Counter, defaultdict import csv from dataclasses import dataclass from datetime import datetime, timedelta +from enum import Enum +from logging import warning from pathlib import Path import re +from statistics import mean, median, stdev from sys import argv from urllib.parse import urlparse from typing import Dict, List, Tuple -from warnings import warn import user_agents @@ -86,28 +88,44 @@ def parse(logs_paths): Visit = List[Access] +class UserAgentKind(Enum): + PC = 'pc' + MOBILE = 'mobile' + TABLET = 'tablet' + BOT = 'bot' + NA = 'n/a' + + @classmethod + def from_useragent(cls, ua_string): + ua = user_agents.parse(ua_string) + # is_bot is not mutually exclusive with other is_* predicates. + if ua.is_bot: + return cls.BOT + if ua.is_pc: + return cls.PC + if ua.is_mobile: + return cls.MOBILE + if ua.is_tablet: + return cls.TABLET + warning(f'Unknown user agent kind: {ua_string}') + return cls.NA + + def is_human(self): + return self in { + UserAgentKind.PC, + UserAgentKind.MOBILE, + UserAgentKind.TABLET + } + + @dataclass class Visitor: address: str - useragent: str + useragent: UserAgentKind referrers: List[str] visits: List[Visit] -def useragent_kind(ua_string): - ua = user_agents.parse(ua_string) - if ua.is_pc: - return 'pc' - if ua.is_mobile: - return 'mobile' - if ua.is_tablet: - return 'tablet' - if ua.is_bot: - return 'bot' - warn(f'Unknown user agent kind: {ua_string}') - return 'n/a' - - def sort_visits(accesses): visitors: Dict[Tuple(str, str), Visitor] = {} @@ -118,7 +136,7 @@ def sort_visits(accesses): if visitor is None: visitor = Visitor( a.address, - useragent_kind(a.useragent), + UserAgentKind.from_useragent(a.useragent), a.referrer, [[a]] ) @@ -155,7 +173,7 @@ def daily_visitors(visitors, output_path): for v in visitors.values(): for day in find_days(v.visits): - days[day][v.useragent] += 1 + days[day][v.useragent.value] += 1 with open(output_path, 'w') as f: out = csv.writer(f) @@ -180,7 +198,7 @@ def daily_visits(visitors, output_path): for v in visitors.values(): for visit in v.visits: day = datetime_day(visit[0].time) - days[day][v.useragent] += 1 + days[day][v.useragent.value] += 1 with open(output_path, 'w') as f: out = csv.writer(f) @@ -196,11 +214,45 @@ def daily_visits(visitors, output_path): print(*values, sep='\t') +def daily_pages_per_visit(visitors, output_path): + days: Dict[datetime, list] = defaultdict(list) + columns = ('min', 'max', 'med', 'avg', 'dev') + + print('Pages/visit:') + + for v in visitors.values(): + if not v.useragent.is_human(): + continue + + for visit in v.visits: + day = datetime_day(visit[0].time) + days[day].append(len(visit)) + + with open(output_path, 'w') as f: + out = csv.writer(f) + out.writerow(('day', *columns)) + print('day', *columns, sep='\t') + + for day in sorted(days): + view_counts = days[day] + values = ( + day.strftime('%F'), + min(view_counts), + max(view_counts), + median(view_counts), + mean(view_counts), + stdev(view_counts) + ) + + out.writerow(values) + print(*values[:4], *(f'{v:.2f}' for v in values[4:]), sep='\t') + + def daily_stats(visitors, output_dir): output_dir = Path(output_dir) daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv')) daily_visits(visitors, output_dir.joinpath('dailyvisits.csv')) - # daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv')) + daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv')) # daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv')) # daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv')) |
