feat: URL tootu a hashtagy v textu
This commit is contained in:
+10
-4
@@ -35,8 +35,7 @@ def api_get(url, token):
|
||||
raise
|
||||
|
||||
def clean_content(content):
|
||||
text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "", content, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = re.sub(r"<[^>]+>", " ", content)
|
||||
text = html.unescape(text)
|
||||
return re.sub(r"\s+", " ", text).strip()
|
||||
|
||||
@@ -56,6 +55,12 @@ def main():
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
trend_tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", token)
|
||||
tags = [t["name"] for t in trend_tags]
|
||||
except Exception:
|
||||
tags = []
|
||||
|
||||
candidates = []
|
||||
for s in statuses:
|
||||
if "@" in s.get("account", {}).get("acct", ""):
|
||||
@@ -68,6 +73,7 @@ def main():
|
||||
candidates.append({
|
||||
"acct": s["account"]["acct"],
|
||||
"text": text,
|
||||
"url": s.get("url", ""),
|
||||
"reblogs": reblogs,
|
||||
"favourites": favourites,
|
||||
"score": reblogs + favourites,
|
||||
@@ -81,9 +87,9 @@ def main():
|
||||
out_path = os.path.join("data", f"{today}.json")
|
||||
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
json.dump({"date": today, "top": top}, f, ensure_ascii=False, indent=2)
|
||||
json.dump({"date": today, "top": top, "tags": tags}, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Uloženo: {out_path} ({len(top)} tootů)")
|
||||
print(f"Uloženo: {out_path} ({len(top)} tootů, {len(tags)} hashtagů)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
+35
-11
@@ -7,6 +7,7 @@ import sys
|
||||
import argparse
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from collections import Counter
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
TIPS = [
|
||||
@@ -103,7 +104,6 @@ def get_measures(base_url, admin_token, date_from, date_to, keys=None):
|
||||
raise
|
||||
|
||||
def truncate(text, max_chars=100):
|
||||
text = re.sub(r'<a\b[^>]*class="[^"]*hashtag[^"]*"[^>]*>.*?</a>', "",text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"<[^>]+>", " ", text)
|
||||
text = html.unescape(text)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
@@ -147,7 +147,7 @@ def build_monthly_toot(measures_data, tags, top_tooty, date_to, prev_stats, inst
|
||||
|
||||
if top_tooty:
|
||||
blocks = "\n\n".join(
|
||||
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']} ⭐ {s['favourites']}"
|
||||
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']} ⭐ {s['favourites']}\n🔗 {s.get('url', '')}"
|
||||
for s in top_tooty
|
||||
)
|
||||
tooty_sekce = f"\n🌟 Tooty měsíce:\n\n{blocks}"
|
||||
@@ -182,7 +182,7 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):
|
||||
|
||||
if top_tooty:
|
||||
blocks = "\n\n".join(
|
||||
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']} ⭐ {s['favourites']}"
|
||||
f"👤 @{s['acct']}\n\"{truncate(s['text'], 80).replace(chr(10), ' ')}\"\n🔁 {s['reblogs']} ⭐ {s['favourites']}\n🔗 {s.get('url', '')}"
|
||||
for s in top_tooty
|
||||
)
|
||||
toot_tyden = f"🌟 Tooty týdne:\n\n{blocks}\n\n"
|
||||
@@ -208,6 +208,26 @@ def build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number):
|
||||
f"💡 Tip týdne: {tip}"
|
||||
)
|
||||
|
||||
def load_tags_from_data(date_to, days, top_n):
|
||||
counts = Counter()
|
||||
found_any = False
|
||||
for i in range(days):
|
||||
day = (date_to - timedelta(days=i)).strftime("%Y-%m-%d")
|
||||
path = os.path.join("data", f"{day}.json")
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
file_data = json.load(f)
|
||||
tags = file_data.get("tags")
|
||||
if tags:
|
||||
found_any = True
|
||||
for tag in tags:
|
||||
counts[tag] += 1
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
if not found_any:
|
||||
return None
|
||||
return [{"name": tag} for tag, _ in counts.most_common(top_n)]
|
||||
|
||||
def load_tooty_from_data(date_to, days):
|
||||
seen = set()
|
||||
all_tooty = []
|
||||
@@ -258,10 +278,12 @@ def main():
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
|
||||
except Exception:
|
||||
tags = []
|
||||
tags = load_tags_from_data(date_to, 30, 5)
|
||||
if tags is None:
|
||||
try:
|
||||
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=5", admin_token)
|
||||
except Exception:
|
||||
tags = []
|
||||
|
||||
try:
|
||||
instance_info = api_get(f"{base_url}/api/v1/instance")
|
||||
@@ -314,10 +336,12 @@ def main():
|
||||
except Exception:
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
|
||||
except Exception:
|
||||
tags = []
|
||||
tags = load_tags_from_data(date_to, 7, 3)
|
||||
if tags is None:
|
||||
try:
|
||||
tags = api_get(f"{base_url}/api/v1/trends/tags?limit=3", admin_token)
|
||||
except Exception:
|
||||
tags = []
|
||||
|
||||
top_tooty = load_tooty_from_data(date_to, 7)
|
||||
toot = build_toot(measures_data, tags, top_tooty, date_from, date_to, week_number)
|
||||
|
||||
Reference in New Issue
Block a user