2 Commits

3 changed files with 86 additions and 17 deletions
+29 -14
View File
@@ -1,26 +1,41 @@
# mamutovo-stats-bot # mamutovo-stats-bot
Týdenní statistiky Mamutovo.cz bot tootuje každé pondělí přehled aktivity instance z účtu [@novinky@mamutovo.cz](https://mamutovo.cz/@novinky). Statistický bot pro Mamutovo.cz. Tootuje týdenní a měsíční přehledy aktivity instance z účtu @novinky@mamutovo.cz.
## Co tootuje ## Co tootuje
- Počet nových a aktivních uživatelů
- Počet nových uživatelů za týden - Počet interakcí a tootů za období
- Počet aktivních uživatelů - Sdílená média (fotky, videa)
- Počet tootů za týden - Nejaktivnější hodina dne
- Top hashtagy - Populární hashtagy
- Top tooty období (podle boostů + oblíbených)
- Top odkazy období
- Nejdiskutovanější toot
- Top přispěvatel
- Nový účet
- Porovnání s předchozím obdobím
- Tip týdne (rotující seznam) - Tip týdne (rotující seznam)
## Jak funguje ## Soubory
- daily_top.py denní sběr dat z timeline (cron denně)
- weekly_report.py generuje a posílá report (cron týdně/měsíčně)
- data/*.json denní data, mazána po 60 dnech
Python skript spouštěný cronem každé pondělí v 9:00. Používá Mastodon admin API. ## Jak funguje
Python 3, pouze standardní knihovny. daily_top.py běží denně a ukládá data do data/. weekly_report.py sestaví report z nasbíraných dat a pošle ho na @novinky.
## Režimy weekly_report.py
- výchozí týdenní přehled
- --monthly měsíční přehled
- --dry-run jen vypíše, neodešle
## Požadavky ## Požadavky
- Python 3.9+ (kvůli zoneinfo)
- Python 3 - .env s proměnnými: NOVINKY_TOKEN, STATS_TOKEN, INSTANCE_URL
- Admin token pro mamutovo.cz
- Proměnná prostředí `NOVINKY_TOKEN` v souboru `.env`
## Nasazení ## Nasazení
Server: archos@VPS
Cesta: /opt/mamutovo-stats-bot/
Server: `archos@VPS server` ## Licence
Cesta: `/opt/mamutovo-stats-bot/` MIT
+14
View File
@@ -108,12 +108,14 @@ def main():
tag_counts[name] = tag_counts.get(name, 0) + 1 tag_counts[name] = tag_counts.get(name, 0) + 1
reblogs = toot.get("reblogs_count", 0) reblogs = toot.get("reblogs_count", 0)
favourites = toot.get("favourites_count", 0) favourites = toot.get("favourites_count", 0)
replies = toot.get("replies_count", 0)
candidates.append({ candidates.append({
"acct": toot["account"]["acct"], "acct": toot["account"]["acct"],
"text": text, "text": text,
"url": toot.get("url", ""), "url": toot.get("url", ""),
"reblogs": reblogs, "reblogs": reblogs,
"favourites": favourites, "favourites": favourites,
"replies": replies,
"score": reblogs + favourites, "score": reblogs + favourites,
}) })
@@ -121,6 +123,17 @@ def main():
top = candidates[:3] top = candidates[:3]
tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]] tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
most_discussed = None
if candidates:
md = max(candidates, key=lambda x: x["replies"])
if md["replies"] > 0:
most_discussed = {
"acct": md["acct"],
"text": md["text"],
"url": md["url"],
"replies": md["replies"],
}
authors_count = {} authors_count = {}
for c in candidates: for c in candidates:
authors_count[c["acct"]] = authors_count.get(c["acct"], 0) + 1 authors_count[c["acct"]] = authors_count.get(c["acct"], 0) + 1
@@ -162,6 +175,7 @@ def main():
"authors_count": authors_count, "authors_count": authors_count,
"newest_account": newest_account, "newest_account": newest_account,
"top": top, "top": top,
"most_discussed": most_discussed,
"tags": tags, "tags": tags,
"top_links": top_links, "top_links": top_links,
"media_count": media_count, "media_count": media_count,
+43 -3
View File
@@ -128,7 +128,7 @@ def format_month_cs(dt):
def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, instance_info, def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, instance_info,
total_count=0, top_author=None, newest_account=None, top_links=None, total_count=0, top_author=None, newest_account=None, top_links=None,
media_count=None, hourly_count=None): media_count=None, hourly_count=None, most_discussed=None):
stats = {m["key"]: int(m["total"]) for m in measures_data} stats = {m["key"]: int(m["total"]) for m in measures_data}
new_users = stats.get("new_users", 0) new_users = stats.get("new_users", 0)
active_users = stats.get("active_users", 0) active_users = stats.get("active_users", 0)
@@ -186,6 +186,16 @@ def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, inst
else: else:
peak_line = "" peak_line = ""
if most_discussed:
discussed_sekce = (
f"\n💬 Nejdiskutovanější toot:\n"
f"@{most_discussed['acct']} ({most_discussed['replies']} odpovědí)\n"
f"\"{truncate(most_discussed['text'], 80).replace(chr(10), ' ')}\"\n"
f"🔗 {most_discussed.get('url', '')}"
)
else:
discussed_sekce = ""
return ( return (
f"🐘 Měsíční přehled Mamutovo.cz\n" f"🐘 Měsíční přehled Mamutovo.cz\n"
f"📅 {format_month_cs(date_to)}\n" f"📅 {format_month_cs(date_to)}\n"
@@ -206,11 +216,12 @@ def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, inst
f"{extra}" f"{extra}"
f"{tooty_sekce}" f"{tooty_sekce}"
f"{links_sekce}" f"{links_sekce}"
f"{discussed_sekce}"
) )
def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number, def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number,
total_count=0, top_author=None, newest_account=None, user_count=0, top_links=None, total_count=0, top_author=None, newest_account=None, user_count=0, top_links=None,
prev_stats=None, media_count=None, hourly_count=None): prev_stats=None, media_count=None, hourly_count=None, most_discussed=None):
stats = {m["key"]: int(m["total"]) for m in measures_data} stats = {m["key"]: int(m["total"]) for m in measures_data}
new_users = stats.get("new_users", 0) new_users = stats.get("new_users", 0)
active_users = stats.get("active_users", 0) active_users = stats.get("active_users", 0)
@@ -262,6 +273,16 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number,
else: else:
peak_line = "" peak_line = ""
if most_discussed:
discussed_sekce = (
f"💬 Nejdiskutovanější toot:\n"
f"@{most_discussed['acct']} ({most_discussed['replies']} odpovědí)\n"
f"\"{truncate(most_discussed['text'], 80).replace(chr(10), ' ')}\"\n"
f"🔗 {most_discussed.get('url', '')}\n\n"
)
else:
discussed_sekce = ""
date_from_str = format_date_cs(date_from) date_from_str = format_date_cs(date_from)
date_to_str = format_date_cs(date_to) date_to_str = format_date_cs(date_to)
year = date_to.year year = date_to.year
@@ -282,6 +303,7 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number,
f"\n" f"\n"
f"{toot_tyden}" f"{toot_tyden}"
f"{links_sekce}" f"{links_sekce}"
f"{discussed_sekce}"
f"{extra}" f"{extra}"
f"💡 Tip týdne: {tip}" f"💡 Tip týdne: {tip}"
) )
@@ -405,6 +427,21 @@ def load_hourly_count_from_data(date_to, days):
pass pass
return totals return totals
def load_most_discussed_from_data(date_to, days):
best = None
for i in range(days):
day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d")
path = os.path.join("data", f"{day}.json")
try:
with open(path, encoding="utf-8") as f:
file_data = json.load(f)
md = file_data.get("most_discussed")
if md and (best is None or md.get("replies", 0) > best.get("replies", 0)):
best = md
except FileNotFoundError:
pass
return best
def load_newest_account_from_data(date_to, days): def load_newest_account_from_data(date_to, days):
for i in range(days): for i in range(days):
day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d") day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d")
@@ -477,9 +514,11 @@ def main():
top_links = load_top_links_from_data(date_to, 30) top_links = load_top_links_from_data(date_to, 30)
media_count = load_media_count_from_data(date_to, 30) media_count = load_media_count_from_data(date_to, 30)
hourly_count = load_hourly_count_from_data(date_to, 30) hourly_count = load_hourly_count_from_data(date_to, 30)
most_discussed = load_most_discussed_from_data(date_to, 30)
toot = build_monthly_toot( toot = build_monthly_toot(
measures_data, tags, top_tooty, date_to, prev_stats, instance_info, measures_data, tags, top_tooty, date_to, prev_stats, instance_info,
total_count, top_author, newest_account, top_links, media_count, hourly_count, total_count, top_author, newest_account, top_links, media_count, hourly_count,
most_discussed,
) )
if args.dry_run: if args.dry_run:
@@ -545,10 +584,11 @@ def main():
top_links = load_top_links_from_data(date_to, 7) top_links = load_top_links_from_data(date_to, 7)
media_count = load_media_count_from_data(date_to, 7) media_count = load_media_count_from_data(date_to, 7)
hourly_count = load_hourly_count_from_data(date_to, 7) hourly_count = load_hourly_count_from_data(date_to, 7)
most_discussed = load_most_discussed_from_data(date_to, 7)
toot = build_toot( toot = build_toot(
measures_data, tags, top_tooty, date_from, date_to, week_number, measures_data, tags, top_tooty, date_from, date_to, week_number,
total_count, top_author, newest_account, user_count, top_links, prev_stats, media_count, total_count, top_author, newest_account, user_count, top_links, prev_stats, media_count,
hourly_count, hourly_count, most_discussed,
) )
if args.dry_run: if args.dry_run: