feat: URL tootu a hashtagy v textu

This commit is contained in:
2026-04-20 19:00:45 +02:00
parent eb72db2482
commit 6e0df5231b
2 changed files with 45 additions and 15 deletions
+10 -4
View File
@@ -35,8 +35,7 @@ def api_get(url, token):
raise
def clean_content(content):
text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "", content, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", " ", text)
text = re.sub(r"<[^>]+>", " ", content)
text = html.unescape(text)
return re.sub(r"\s+", " ", text).strip()
@@ -56,6 +55,12 @@ def main():
except Exception:
sys.exit(1)
try:
trend_tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", token)
tags = [t["name"] for t in trend_tags]
except Exception:
tags = []
candidates = []
for s in statuses:
if "@" in s.get("account", {}).get("acct", ""):
@@ -68,6 +73,7 @@ def main():
candidates.append({
"acct": s["account"]["acct"],
"text": text,
"url": s.get("url", ""),
"reblogs": reblogs,
"favourites": favourites,
"score": reblogs + favourites,
@@ -81,9 +87,9 @@ def main():
out_path = os.path.join("data", f"{today}.json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump({"date": today, "top": top}, f, ensure_ascii=False, indent=2)
json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)
print(f"Uloženo: {out_path} ({len(top)} tootů)")
print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")
if __name__ == "__main__":
main()
+35 -11
View File
@@ -7,6 +7,7 @@ import sys
import argparse
import urllib.request
import urllib.error
from collections import Counter
from datetime import datetime, timezone, timedelta
TIPS = [
@@ -103,7 +104,6 @@ def get_measures(base_url, admin_token, date_from, date_to, keys=None):
raise
def truncate(text, max_chars=100):
text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "",text, flags=re.IGNORECASE)
text = re.sub(r"<[^>]+>", " ", text)
text = html.unescape(text)
text = re.sub(r"\s+", " ", text).strip()
@@ -147,7 +147,7 @@ def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, inst
if top_tooty:
blocks = "\n\n".join(
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}{s['favourites']}"
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}{s['favourites']}\n🔗 {s.get('url', '')}"
for s in top_tooty
)
tooty_sekce = f"\n🌟 Tooty měsíce:\n\n{blocks}"
@@ -182,7 +182,7 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):
if top_tooty:
blocks = "\n\n".join(
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}{s['favourites']}"
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}{s['favourites']}\n🔗 {s.get('url', '')}"
for s in top_tooty
)
toot_tyden = f"🌟 Tooty týdne:\n\n{blocks}\n\n"
@@ -208,6 +208,26 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):
f"💡 Tip týdne: {tip}"
)
def load_tags_from_data(date_to, days, top_n):
counts = Counter()
found_any = False
for i in range(days):
day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d")
path = os.path.join("data", f"{day}.json")
try:
with open(path, encoding="utf-8") as f:
file_data = json.load(f)
tags = file_data.get("tags")
if tags:
found_any = True
for tag in tags:
counts[tag] += 1
except FileNotFoundError:
pass
if not found_any:
return None
return [{"name": tag} for tag, _ in counts.most_common(top_n)]
def load_tooty_from_data(date_to, days):
seen = set()
all_tooty = []
@@ -258,10 +278,12 @@ def main():
except Exception:
sys.exit(1)
try:
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
except Exception:
tags = []
tags = load_tags_from_data(date_to, 30, 5)
if tags is None:
try:
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
except Exception:
tags = []
try:
instance_info = api_get(f"{base_url}/api/v1/instance")
@@ -314,10 +336,12 @@ def main():
except Exception:
sys.exit(1)
try:
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
except Exception:
tags = []
tags = load_tags_from_data(date_to, 7, 3)
if tags is None:
try:
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
except Exception:
tags = []
top_tooty = load_tooty_from_data(date_to, 7)
toot = build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number)