summaryrefslogtreecommitdiff
path: root/admin
diff options
context:
space:
mode:
authorKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 19:16:36 +0200
committerKévin Le Gouguec <kevin.legouguec@gmail.com>2021-03-28 19:16:36 +0200
commita65d69ce7222ce65f66c1c20ac3febf8298fec33 (patch)
tree115de6169400e47112ff783e9891d6e61d23909d /admin
parent2ac0e2bf6bc2abebba611147ec994da5b6c06e10 (diff)
downloadquatuorbellefeuille.com-a65d69ce7222ce65f66c1c20ac3febf8298fec33.tar.xz
Add daily referrer counts
Diffstat (limited to 'admin')
-rwxr-xr-xadmin/stats/dump.py66
1 files changed, 57 insertions, 9 deletions
diff --git a/admin/stats/dump.py b/admin/stats/dump.py
index ac412c9..ce7d5b2 100755
--- a/admin/stats/dump.py
+++ b/admin/stats/dump.py
@@ -11,7 +11,7 @@ import re
from statistics import mean, median, stdev
from sys import argv
from urllib.parse import unquote, urlparse
-from typing import Dict, List, Tuple
+from typing import Dict, List, Set, Tuple
import user_agents
@@ -31,15 +31,15 @@ DATE_FMT = '[%d/%b/%Y:%H:%M:%S %z]'
VISIT_MAX_DURATION = timedelta(hours=1)
-DOMAINS = {
+DOMAINS = (
'quatuorbellefeuille.com',
'quatuorbellefeuille.fr',
'klg.uber.space'
-}
+)
def normalize_path(p):
- if p == '/':
+ if p in ('', '/'):
return '/index.html'
return unquote(p)
@@ -57,10 +57,12 @@ class Access:
resource = normalize_path(info['resource'])
referrer = urlparse(info['referer'])
- if referrer.netloc in DOMAINS:
+ if referrer.netloc.endswith(DOMAINS):
ref = normalize_path(referrer.path)
- else:
+ elif referrer.netloc:
ref = referrer.netloc
+ else:
+ ref = 'n/a'
return cls(
info['address'], info['useragent'], ref,
@@ -122,7 +124,6 @@ class UserAgentKind(Enum):
class Visitor:
address: str
useragent: UserAgentKind
- referrers: List[str]
visits: List[Visit]
@@ -137,7 +138,6 @@ def sort_visits(accesses):
visitor = Visitor(
a.address,
UserAgentKind.from_useragent(a.useragent),
- a.referrer,
[[a]]
)
visitors[key] = visitor
@@ -175,6 +175,26 @@ def find_pages(visitors):
})
+def external_referrer(ref):
+ return ref != 'n/a' and not ref.startswith('/')
+
+
+def simplify_referrer(ref):
+ parts = ref.split('.')
+ # Remove leading parts (www., l., m.…) and extension (.com, .fr…).
+ return parts[-2]
+
+
+def find_referrers(visitors):
+ return sorted({
+ simplify_referrer(access.referrer)
+ for v in visitors
+ for visit in v.visits
+ for access in visit
+ if external_referrer(access.referrer)
+ })
+
+
def daily_visitors(visitors, output_path):
days: Dict[datetime, Counter] = defaultdict(Counter)
columns = ('mobile', 'tablet', 'pc', 'bot', 'n/a')
@@ -288,13 +308,41 @@ def daily_page_hits(visitors, output_path):
print(hits, page, sep='\t')
+def daily_referrers(visitors, output_path):
+ days: Dict[datetime, Counter] = defaultdict(Counter)
+ columns = find_referrers(visitors.values())
+
+ print('Referrers:')
+
+ for v in visitors.values():
+ for visit in v.visits:
+ day = datetime_day(visit[0].time)
+
+ for access in visit:
+ if not external_referrer(access.referrer):
+ continue
+ days[day][simplify_referrer(access.referrer)] += 1
+
+ with open(output_path, 'w') as f:
+ out = csv.writer(f)
+ out.writerow(('day', *columns))
+ print('day', *columns, sep='\t')
+
+ for day in sorted(days):
+ refcounts = days[day]
+ values = (day.strftime('%F'), *(refcounts[ref] for ref in columns))
+
+ out.writerow(values)
+ print(*values, sep='\t')
+
+
def daily_stats(visitors, output_dir):
output_dir = Path(output_dir)
daily_visitors(visitors, output_dir.joinpath('dailyvisitors.csv'))
daily_visits(visitors, output_dir.joinpath('dailyvisits.csv'))
daily_pages_per_visit(visitors, output_dir.joinpath('dailypagespervisit.csv'))
daily_page_hits(visitors, output_dir.joinpath('dailypagehits.csv'))
- # daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv'))
+ daily_referrers(visitors, output_dir.joinpath('dailyreferrers.csv'))
def global_stats(visitors, output_dir):