feat: stránkování denní timeline, načte celý den místo 40 tootů

2026-05-04 19:01:23 +02:00
parent 1298e391d0
commit 1a300680e3
1 changed files with 39 additions and 22 deletions
@@ -40,6 +40,29 @@ def clean_content(content):
    text = re.sub(r"#\s+(\w)", r"#\1", text)
    return re.sub(r"\s+", " ", text).strip()

+def fetch_daily_timeline(base_url, token):
+    cutoff = datetime.now(timezone.utc).timestamp() - 86400
+    all_toots = []
+    max_id = None
+    for _ in range(10):
+        url = f"{base_url}/api/v1/timelines/public?local=true&limit=40"
+        if max_id:
+            url += f"&max_id={max_id}"
+        toots = api_get(url, token)
+        if not toots:
+            break
+        done = False
+        for toot in toots:
+            created_at = datetime.fromisoformat(toot["created_at"].replace("Z", "+00:00"))
+            if created_at.timestamp() < cutoff:
+                done = True
+                break
+            all_toots.append(toot)
+        if done:
+            break
+        max_id = toots[-1]["id"]
+    return all_toots
+
 def main():
    env = {**load_env(), **os.environ}

@@ -52,36 +75,29 @@ def main():
    base_url = env["INSTANCE_URL"].rstrip("/")

    try:
-        statuses = api_get(f"{base_url}/api/v1/trends/statuses?limit=10", token)
+        timeline = fetch_daily_timeline(base_url, token)
    except Exception:
        sys.exit(1)

-    try:
-        timeline = api_get(f"{base_url}/api/v1/timelines/public?local=true&limit=40", token)
-        tag_counts = {}
-        for toot in timeline:
-            if toot.get("language") != "cs":
-                continue
-            for tag in toot.get("tags", []):
-                name = tag["name"]
-                tag_counts[name] = tag_counts.get(name, 0) + 1
-        tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
-    except Exception:
-        tags = []
-
+    tag_counts = {}
    candidates = []
-    for s in statuses:
-        if "@" in s.get("account", {}).get("acct", ""):
+    for toot in timeline:
+        if toot.get("language") != "cs":
            continue
-        text = clean_content(s.get("content", ""))
+        if "@" in toot.get("account", {}).get("acct", ""):
+            continue
+        text = clean_content(toot.get("content", ""))
        if len(text) < 10:
            continue
-        reblogs = s.get("reblogs_count", 0)
-        favourites = s.get("favourites_count", 0)
+        for tag in toot.get("tags", []):
+            name = tag["name"]
+            tag_counts[name] = tag_counts.get(name, 0) + 1
+        reblogs = toot.get("reblogs_count", 0)
+        favourites = toot.get("favourites_count", 0)
        candidates.append({
-            "acct": s["account"]["acct"],
+            "acct": toot["account"]["acct"],
            "text": text,
-            "url": s.get("url", ""),
+            "url": toot.get("url", ""),
            "reblogs": reblogs,
            "favourites": favourites,
            "score": reblogs + favourites,
@@ -89,6 +105,7 @@ def main():

    candidates.sort(key=lambda x: x["score"], reverse=True)
    top = candidates[:3]
+    tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]

    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    os.makedirs("data", exist_ok=True)
@@ -97,7 +114,7 @@ def main():
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)

-    print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")
+    print(f"Uloženo: {out_path} ({len(timeline)} tootů načteno, {len(top)} top, {len(tags)} hashtagů)")

 if __name__ == "__main__":
    main()