feat: URL tootu a hashtagy v textu

2026-04-20 19:00:45 +02:00
parent eb72db2482
commit 6e0df5231b
2 changed files with 45 additions and 15 deletions
@@ -35,8 +35,7 @@ def api_get(url, token):
        raise

 def clean_content(content):
-    text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "", content, flags=re.IGNORECASE)
-    text = re.sub(r"<[^>]+>", " ", text)
+    text = re.sub(r"<[^>]+>", " ", content)
    text = html.unescape(text)
    return re.sub(r"\s+", " ", text).strip()

@@ -56,6 +55,12 @@ def main():
    except Exception:
        sys.exit(1)

+    try:
+        trend_tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", token)
+        tags = [t["name"] for t in trend_tags]
+    except Exception:
+        tags = []
+
    candidates = []
    for s in statuses:
        if "@" in s.get("account", {}).get("acct", ""):
@@ -68,6 +73,7 @@ def main():
        candidates.append({
            "acct": s["account"]["acct"],
            "text": text,
+            "url": s.get("url", ""),
            "reblogs": reblogs,
            "favourites": favourites,
            "score": reblogs + favourites,
@@ -81,9 +87,9 @@ def main():
    out_path = os.path.join("data", f"{today}.json")

    with open(out_path, "w", encoding="utf-8") as f:
-        json.dump({"date": today, "top": top}, f, ensure_ascii=False, indent=2)
+        json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)

-    print(f"Uloženo: {out_path} ({len(top)} tootů)")
+    print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")

 if __name__ == "__main__":
    main()
@@ -7,6 +7,7 @@ import sys
 import argparse
 import urllib.request
 import urllib.error
+from collections import Counter
 from datetime import datetime, timezone, timedelta

 TIPS = [
@@ -103,7 +104,6 @@ def get_measures(base_url, admin_token, date_from, date_to, keys=None):
        raise

 def truncate(text, max_chars=100):
-    text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "",text, flags=re.IGNORECASE)
    text = re.sub(r"<[^>]+>", " ", text)
    text = html.unescape(text)
    text = re.sub(r"\s+", " ", text).strip()
@@ -147,7 +147,7 @@ def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, inst

    if top_tooty:
        blocks = "\n\n".join(
-            f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}  ⭐ {s['favourites']}"
+            f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}  ⭐ {s['favourites']}\n🔗 {s.get('url', '')}"
            for s in top_tooty
        )
        tooty_sekce = f"\n🌟 Tooty měsíce:\n\n{blocks}"
@@ -182,7 +182,7 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):

    if top_tooty:
        blocks = "\n\n".join(
-            f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}  ⭐ {s['favourites']}"
+            f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']}  ⭐ {s['favourites']}\n🔗 {s.get('url', '')}"
            for s in top_tooty
        )
        toot_tyden = f"🌟 Tooty týdne:\n\n{blocks}\n\n"
@@ -208,6 +208,26 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):
        f"💡 Tip týdne: {tip}"
    )

+def load_tags_from_data(date_to, days, top_n):
+    counts = Counter()
+    found_any = False
+    for i in range(days):
+        day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d")
+        path = os.path.join("data", f"{day}.json")
+        try:
+            with open(path, encoding="utf-8") as f:
+                file_data = json.load(f)
+            tags = file_data.get("tags")
+            if tags:
+                found_any = True
+                for tag in tags:
+                    counts[tag] += 1
+        except FileNotFoundError:
+            pass
+    if not found_any:
+        return None
+    return [{"name": tag} for tag, _ in counts.most_common(top_n)]
+
 def load_tooty_from_data(date_to, days):
    seen = set()
    all_tooty = []
@@ -258,10 +278,12 @@ def main():
        except Exception:
            sys.exit(1)

-        try:
-            tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
-        except Exception:
-            tags = []
+        tags = load_tags_from_data(date_to, 30, 5)
+        if tags is None:
+            try:
+                tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
+            except Exception:
+                tags = []

        try:
            instance_info = api_get(f"{base_url}/api/v1/instance")
@@ -314,10 +336,12 @@ def main():
    except Exception:
        sys.exit(1)

-    try:
-        tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
-    except Exception:
-        tags = []
+    tags = load_tags_from_data(date_to, 7, 3)
+    if tags is None:
+        try:
+            tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
+        except Exception:
+            tags = []

    top_tooty = load_tooty_from_data(date_to, 7)
    toot = build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number)