feat: stránkování denní timeline, načte celý den místo 40 tootů
This commit is contained in:
+39
-22
@@ -40,6 +40,29 @@ def clean_content(content):
|
||||
text = re.sub(r"#\s+(\w)", r"#\1", text)
|
||||
return re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
def fetch_daily_timeline(base_url, token):
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - 86400
|
||||
all_toots = []
|
||||
max_id = None
|
||||
for _ in range(10):
|
||||
url = f"{base_url}/api/v1/timelines/public?local=true&limit=40"
|
||||
if max_id:
|
||||
url += f"&max_id={max_id}"
|
||||
toots = api_get(url, token)
|
||||
if not toots:
|
||||
break
|
||||
done = False
|
||||
for toot in toots:
|
||||
created_at = datetime.fromisoformat(toot["created_at"].replace("Z", "+00:00"))
|
||||
if created_at.timestamp() < cutoff:
|
||||
done = True
|
||||
break
|
||||
all_toots.append(toot)
|
||||
if done:
|
||||
break
|
||||
max_id = toots[-1]["id"]
|
||||
return all_toots
|
||||
|
||||
def main():
|
||||
env = {**load_env(), **os.environ}
|
||||
|
||||
@@ -52,36 +75,29 @@ def main():
|
||||
base_url = env["INSTANCE_URL"].rstrip("/")
|
||||
|
||||
try:
|
||||
statuses = api_get(f"{base_url}/api/v1/trends/statuses?limit=10", token)
|
||||
timeline = fetch_daily_timeline(base_url, token)
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
timeline = api_get(f"{base_url}/api/v1/timelines/public?local=true&limit=40", token)
|
||||
tag_counts = {}
|
||||
for toot in timeline:
|
||||
if toot.get("language") != "cs":
|
||||
continue
|
||||
for tag in toot.get("tags", []):
|
||||
name = tag["name"]
|
||||
tag_counts[name] = tag_counts.get(name, 0) + 1
|
||||
tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
|
||||
except Exception:
|
||||
tags = []
|
||||
|
||||
tag_counts = {}
|
||||
candidates = []
|
||||
for s in statuses:
|
||||
if "@" in s.get("account", {}).get("acct", ""):
|
||||
for toot in timeline:
|
||||
if toot.get("language") != "cs":
|
||||
continue
|
||||
text = clean_content(s.get("content", ""))
|
||||
if "@" in toot.get("account", {}).get("acct", ""):
|
||||
continue
|
||||
text = clean_content(toot.get("content", ""))
|
||||
if len(text) < 10:
|
||||
continue
|
||||
reblogs = s.get("reblogs_count", 0)
|
||||
favourites = s.get("favourites_count", 0)
|
||||
for tag in toot.get("tags", []):
|
||||
name = tag["name"]
|
||||
tag_counts[name] = tag_counts.get(name, 0) + 1
|
||||
reblogs = toot.get("reblogs_count", 0)
|
||||
favourites = toot.get("favourites_count", 0)
|
||||
candidates.append({
|
||||
"acct": s["account"]["acct"],
|
||||
"acct": toot["account"]["acct"],
|
||||
"text": text,
|
||||
"url": s.get("url", ""),
|
||||
"url": toot.get("url", ""),
|
||||
"reblogs": reblogs,
|
||||
"favourites": favourites,
|
||||
"score": reblogs + favourites,
|
||||
@@ -89,6 +105,7 @@ def main():
|
||||
|
||||
candidates.sort(key=lambda x: x["score"], reverse=True)
|
||||
top = candidates[:3]
|
||||
tags = [t for t, _ in sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]]
|
||||
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
os.makedirs("data", exist_ok=True)
|
||||
@@ -97,7 +114,7 @@ def main():
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")
|
||||
print(f"Uloženo: {out_path} ({len(timeline)} tootů načteno, {len(top)} top, {len(tags)} hashtagů)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user