feat: stránkování denní timeline, načte celý den místo 40 tootů

This commit is contained in:
2026-05-04 19:01:23 +02:00
parent 1298e391d0
commit 1a300680e3
+39 -22
View File
@@ -40,6 +40,29 @@ def clean_content(content):
text = re.sub(r"#\s+(\w)", r"#\1", text)
return re.sub(r"\s+", " ", text).strip()
def fetch_daily_timeline(base_url, token):
cutoff = datetime.now(timezone.utc).timestamp() - 86400
all_toots = []
max_id = None
for _ in range(10):
url = f"{base_url}/api/v1/timelines/public?local=true&limit=40"
if max_id:
url += f"&max_id={max_id}"
toots = api_get(url, token)
if not toots:
break
done = False
for toot in toots:
created_at = datetime.fromisoformat(toot["created_at"].replace("Z", "+00:00"))
if created_at.timestamp() < cutoff:
done = True
break
all_toots.append(toot)
if done:
break
max_id = toots[-1]["id"]
return all_toots
def main():
env = {**load_env(), **os.environ}
@@ -52,36 +75,29 @@ def main():
base_url = env["INSTANCE_URL"].rstrip("/")
try:
statuses = api_get(f"{base_url}/api/v1/trends/statuses?limit=10", token)
timeline = fetch_daily_timeline(base_url, token)
except Exception:
sys.exit(1)
try:
timeline = api_get(f"{base_url}/api/v1/timelines/public?local=true&limit=40", token)
tag_counts = {}
for toot in timeline:
if toot.get("language") != "cs":
continue
for tag in toot.get("tags", []):
name = tag["name"]
tag_counts[name] = tag_counts.get(name, 0) + 1
tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
except Exception:
tags = []
tag_counts = {}
candidates = []
for s in statuses:
if "@" in s.get("account", {}).get("acct", ""):
for toot in timeline:
if toot.get("language") != "cs":
continue
text = clean_content(s.get("content", ""))
if "@" in toot.get("account", {}).get("acct", ""):
continue
text = clean_content(toot.get("content", ""))
if len(text) < 10:
continue
reblogs = s.get("reblogs_count", 0)
favourites = s.get("favourites_count", 0)
for tag in toot.get("tags", []):
name = tag["name"]
tag_counts[name] = tag_counts.get(name, 0) + 1
reblogs = toot.get("reblogs_count", 0)
favourites = toot.get("favourites_count", 0)
candidates.append({
"acct": s["account"]["acct"],
"acct": toot["account"]["acct"],
"text": text,
"url": s.get("url", ""),
"url": toot.get("url", ""),
"reblogs": reblogs,
"favourites": favourites,
"score": reblogs + favourites,
@@ -89,6 +105,7 @@ def main():
candidates.sort(key=lambda x: x["score"], reverse=True)
top = candidates[:3]
tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
os.makedirs("data", exist_ok=True)
@@ -97,7 +114,7 @@ def main():
with open(out_path, "w", encoding="utf-8") as f:
json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)
print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")
print(f"Uloženo: {out_path} ({len(timeline)} tootů načteno, {len(top)} top, {len(tags)} hashtagů)")
if __name__ == "__main__":
main()