feat: pouze CZ/SK instance, TOP 100 podle followers
This commit is contained in:
+99
-59
@@ -1,61 +1,101 @@
|
|||||||
Account address,Show boosts
|
Account address,Show boosts
|
||||||
|
FilipHorky@mastodonczech.cz,true
|
||||||
|
alexandrmitrofa@mastodonczech.cz,true
|
||||||
|
petrkou@mastodonczech.cz,true
|
||||||
|
ivanbartos@mastodon.pirati.cz,true
|
||||||
|
xChaos@f.cz,true
|
||||||
|
marcel_kolaja@mastodon.pirati.cz,true
|
||||||
|
jie@mastodonczech.cz,true
|
||||||
|
davidslizek@mastodonczech.cz,true
|
||||||
|
maestrosill@mastodonczech.cz,true
|
||||||
|
OliKockova@cztwitter.cz,true
|
||||||
|
rozanek@mastodonczech.cz,true
|
||||||
|
nolog@witter.cz,true
|
||||||
|
kayla_eilhart@witter.cz,true
|
||||||
|
BohumilVostal@mastodonczech.cz,true
|
||||||
|
filiptitlbach@mastodonczech.cz,true
|
||||||
|
boninska@cztwitter.cz,true
|
||||||
365tipu@mastodonczech.cz,true
|
365tipu@mastodonczech.cz,true
|
||||||
tonline@mastodon.social,true
|
rychlofky@mastodonczech.cz,true
|
||||||
trendytoots@mastodon.social,true
|
parlamentnizpravycz@mastodonczech.cz,true
|
||||||
realTuckFrumper@mastodon.social,true
|
Pirati@mastodon.pirati.cz,true
|
||||||
glyph@mastodon.social,true
|
zandl@mastodon.pirati.cz,true
|
||||||
jbaert@mastodon.social,true
|
fabia_man@mamutovo.cz,true
|
||||||
MacMagazine@mastodon.social,true
|
Roman_M@cztwitter.cz,true
|
||||||
Bahnblogstelle@mastodon.social,true
|
marekl@mastodonczech.cz,true
|
||||||
Viss@mastodon.social,true
|
marketkag@mastodon.pirati.cz,true
|
||||||
3CatInfo@mastodon.social,true
|
VladaFoltan@mastodonczech.cz,true
|
||||||
rpilocator@mastodon.social,true
|
bohdandlouhy@mastodonczech.cz,true
|
||||||
axios@mastodon.social,true
|
Apolenarychlikova@witter.cz,true
|
||||||
Mrfunkedude@mastodon.social,true
|
beneslenka@mastodonczech.cz,true
|
||||||
charlesgaba@mastodon.social,true
|
byczech@witter.cz,true
|
||||||
verge@mastodon.social,true
|
zoul@boskovice.social,true
|
||||||
cmconseils@mastodon.social,true
|
tymoty@f.cz,true
|
||||||
ctietze@mastodon.social,true
|
OttovonWenkoff@mastodonczech.cz,true
|
||||||
agiletortoise@mastodon.social,true
|
Unreed@mastodonczech.cz,true
|
||||||
levelbot@mastodon.social,true
|
lupa@mastodonczech.cz,true
|
||||||
macrumors@mastodon.social,true
|
TomasFriedl@mastodonczech.cz,true
|
||||||
dungeons@mastodon.social,true
|
trnk_c@witter.cz,true
|
||||||
GoatsLive@mastodon.social,true
|
sumavanet@mastodonczech.cz,true
|
||||||
stroughtonsmith@mastodon.social,true
|
honzakorinek@cztwitter.cz,true
|
||||||
die_reklame@mastodon.social,true
|
fipa@mastodonczech.cz,true
|
||||||
gruber@mastodon.social,true
|
zbiejczuk@mastodonczech.cz,true
|
||||||
Le_M_Poireau@mastodon.social,true
|
Otakar_Brabec@cztwitter.cz,true
|
||||||
shriramk@mastodon.social,true
|
blataak@mastodonczech.cz,true
|
||||||
lobsters@mastodon.social,true
|
hajma@cztwitter.cz,true
|
||||||
alternativeto@mas.to,true
|
piskvor@cztwitter.cz,true
|
||||||
popcornreel@mas.to,true
|
zechy@mastodonczech.cz,true
|
||||||
Jgbird@mas.to,true
|
piratzbrna@mastodonczech.cz,true
|
||||||
gabrielesvelto@mas.to,true
|
paveljanicek@mamutovo.cz,true
|
||||||
RantyHighwayman@mas.to,true
|
porucikPihrt@cztwitter.cz,true
|
||||||
harriorrihar@mas.to,true
|
s_kavou_v_ruce@cztwitter.cz,true
|
||||||
Jyoti@mas.to,true
|
mskalick@witter.cz,true
|
||||||
gleick@mas.to,true
|
prrejpal@cztwitter.cz,true
|
||||||
Techaltar@mas.to,true
|
margo@mastodon.arch-linux.cz,true
|
||||||
trumpet@mas.to,true
|
infoekcz@mamutovo.cz,true
|
||||||
kityates@mas.to,true
|
baadvo@mastodonczech.cz,true
|
||||||
Stoned_Deva_@mas.to,true
|
cernejpudinkcz@mastodonczech.cz,true
|
||||||
barunori2023@mas.to,true
|
simindr@mastodonczech.cz,true
|
||||||
libreleah@mas.to,true
|
DivkaVModrem@cztwitter.cz,true
|
||||||
kims@mas.to,true
|
kalendar@mastodon.arch-linux.cz,true
|
||||||
SocraticEthics@mastodon.online,true
|
madla@mastodonczech.cz,true
|
||||||
streetartutopia@mastodon.online,true
|
ChorozonX@cztwitter.cz,true
|
||||||
9to5Mac@mastodon.online,true
|
xmirek@mastodonczech.cz,true
|
||||||
9to5google@mastodon.online,true
|
czstatistika@mastodonczech.cz,true
|
||||||
tinoeberl@mastodon.online,true
|
technotramp@mastodonczech.cz,true
|
||||||
NOSRSS@mastodon.online,true
|
DamianVCechov@cztwitter.cz,true
|
||||||
globalmuseum@mastodon.online,true
|
andrej@mastodon.pirati.cz,true
|
||||||
mastodonmigration@mastodon.online,true
|
vitex@f.cz,true
|
||||||
danirabbit@mastodon.online,true
|
KaterinaGloserova@mastodonczech.cz,true
|
||||||
Aviation_Librarian@mastodon.online,true
|
bycx@mastodonczech.cz,true
|
||||||
gupton68@mastodon.online,true
|
medidekpu@cztwitter.cz,true
|
||||||
phastidio@mastodon.online,true
|
janmelvil@mastodonczech.cz,true
|
||||||
YaLTeR@mastodon.online,true
|
Jankajanicka0@cztwitter.cz,true
|
||||||
newsthump@mastodon.online,true
|
Erdela@mastodonczech.cz,true
|
||||||
nikitonsky@mastodon.online,true
|
Ilusie@mastodon.arch-linux.cz,true
|
||||||
astro_jcm@mastodon.online,true
|
otecfura@witter.cz,true
|
||||||
apache_be@mastodon.online,true
|
Lenislavka@mastodonczech.cz,true
|
||||||
|
caragraph@f.cz,true
|
||||||
|
smoon@mamutovo.cz,true
|
||||||
|
lepapierblanc@mastodonczech.cz,true
|
||||||
|
vitsoft@cztwitter.cz,true
|
||||||
|
Tichy_koutek@mastodonczech.cz,true
|
||||||
|
xcabal05@mamutovo.cz,true
|
||||||
|
babaq@mastodonczech.cz,true
|
||||||
|
tatageek@witter.cz,true
|
||||||
|
medvjed@witter.cz,true
|
||||||
|
jachym@mastodonczech.cz,true
|
||||||
|
Neovlivnicz@witter.cz,true
|
||||||
|
Petr90@mamutovo.cz,true
|
||||||
|
honzajavorek@mastodonczech.cz,true
|
||||||
|
PavelUngr@mastodonczech.cz,true
|
||||||
|
tomulinek@mastodonczech.cz,true
|
||||||
|
backorka@cztwitter.cz,true
|
||||||
|
Onqa6@mastodon.arch-linux.cz,true
|
||||||
|
Razemix@mamutovo.cz,true
|
||||||
|
amarok@mastodonczech.cz,true
|
||||||
|
OpalkovaH@cztwitter.cz,true
|
||||||
|
anlexcz@witter.cz,true
|
||||||
|
tomasmartinek@mastodon.pirati.cz,true
|
||||||
|
lacertacz@mastodonczech.cz,true
|
||||||
|
sibik@mastodonczech.cz,true
|
||||||
|
|||||||
|
+1295
-737
File diff suppressed because it is too large
Load Diff
+55
-41
@@ -18,11 +18,27 @@ Cron (každý den v 3:00):
|
|||||||
0 3 * * * /usr/bin/python3 /opt/mastodon-start/mastodon_cz_accounts.py --output /var/www/start/ >> /var/log/mastodon-start.log 2>&1
|
0 3 * * * /usr/bin/python3 /opt/mastodon-start/mastodon_cz_accounts.py --output /var/www/start/ >> /var/log/mastodon-start.log 2>&1
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json, csv, time, re, argparse, logging
|
import json, csv, time, re, argparse, logging, os
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import urllib.request, urllib.error, urllib.parse
|
import urllib.request, urllib.error, urllib.parse
|
||||||
|
|
||||||
|
def _load_token():
|
||||||
|
token = os.environ.get("MASTODON_TOKEN")
|
||||||
|
if token:
|
||||||
|
return token.strip()
|
||||||
|
env_path = Path(__file__).parent / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
for line in env_path.read_text().splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("MASTODON_TOKEN="):
|
||||||
|
return line.split("=", 1)[1].strip()
|
||||||
|
if line and not line.startswith("#") and "=" not in line:
|
||||||
|
return line # raw token value
|
||||||
|
return None
|
||||||
|
|
||||||
|
MASTODON_TOKEN = _load_token()
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S")
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S")
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -38,28 +54,23 @@ QUERY_INSTANCES = [
|
|||||||
"lgbtcz.social", # 7 CZ uživatelů
|
"lgbtcz.social", # 7 CZ uživatelů
|
||||||
"boskovice.social", # 5 CZ uživatelů
|
"boskovice.social", # 5 CZ uživatelů
|
||||||
"mamutovo.cz",
|
"mamutovo.cz",
|
||||||
# Velké instance – filtr language=cs/sk
|
|
||||||
"mastodon.social", # 346 CZ uživatelů
|
|
||||||
"mas.to", # 33 CZ uživatelů
|
|
||||||
"mastodon.online", # 16 CZ uživatelů
|
|
||||||
"mastodon.world", # 14 CZ uživatelů
|
|
||||||
"mstdn.social", # 12 CZ uživatelů
|
|
||||||
"masto.ai", # 7 CZ uživatelů
|
|
||||||
"fosstodon.org", # 7 CZ uživatelů
|
|
||||||
"infosec.exchange", # 5 CZ uživatelů
|
|
||||||
]
|
]
|
||||||
|
|
||||||
MIN_STATUSES = 10
|
MIN_STATUSES = 10
|
||||||
MIN_FOLLOWERS = 10
|
MIN_FOLLOWERS = 10
|
||||||
MAX_DAYS_INACTIVE = 30
|
MAX_DAYS_INACTIVE = 365
|
||||||
TOP_N = 60
|
TOP_N = 100
|
||||||
RATE_LIMIT_DELAY = 1.2
|
RATE_LIMIT_DELAY = 1.2
|
||||||
PAGE_LIMIT = 80
|
PAGE_LIMIT = 80
|
||||||
MAX_PAGES = 10
|
MAX_PAGES = 10
|
||||||
|
|
||||||
# ── HTTP ──────────────────────────────────────
|
# ── HTTP ──────────────────────────────────────
|
||||||
def api_get(url, timeout=12):
|
def api_get(url, timeout=15):
|
||||||
|
headers = {"User-Agent": "MamutovoStarterBot/1.0 (+https://mamutovo.cz)"}
|
||||||
|
if MASTODON_TOKEN:
|
||||||
|
headers["Authorization"] = f"Bearer {MASTODON_TOKEN}"
|
||||||
try:
|
try:
|
||||||
req = urllib.request.Request(url, headers={"User-Agent": "MamutovoStarterBot/1.0 (+https://mamutovo.cz)"})
|
req = urllib.request.Request(url, headers=headers)
|
||||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||||
return json.loads(r.read().decode())
|
return json.loads(r.read().decode())
|
||||||
except urllib.error.HTTPError as e:
|
except urllib.error.HTTPError as e:
|
||||||
@@ -72,36 +83,39 @@ def api_get(url, timeout=12):
|
|||||||
log.debug(f"Chyba {url}: {e}"); return None
|
log.debug(f"Chyba {url}: {e}"); return None
|
||||||
|
|
||||||
# ── SBĚR ─────────────────────────────────────
|
# ── SBĚR ─────────────────────────────────────
|
||||||
|
def _fetch_small_instance(instance, seen_handles, all_accounts):
|
||||||
|
"""Malé CZ/SK instance: bereme všechny uživatele z directory."""
|
||||||
|
log.info(f"directory {instance} ...")
|
||||||
|
page = 0
|
||||||
|
while page < MAX_PAGES:
|
||||||
|
offset = page * PAGE_LIMIT
|
||||||
|
url = (f"https://{instance}/api/v1/directory"
|
||||||
|
f"?limit={PAGE_LIMIT}&local=true&offset={offset}")
|
||||||
|
batch = api_get(url)
|
||||||
|
if not batch or not isinstance(batch, list):
|
||||||
|
break
|
||||||
|
added = 0
|
||||||
|
for acc in batch:
|
||||||
|
acct = acc.get("acct", "")
|
||||||
|
handle = acct if "@" in acct else f"{acct}@{instance}"
|
||||||
|
if handle in seen_handles:
|
||||||
|
continue
|
||||||
|
seen_handles.add(handle)
|
||||||
|
acc["_handle"] = handle
|
||||||
|
acc["_source_instance"] = instance
|
||||||
|
all_accounts.append(acc)
|
||||||
|
added += 1
|
||||||
|
log.debug(f" {instance} offset={offset}: {added} nových")
|
||||||
|
if len(batch) < PAGE_LIMIT:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
time.sleep(RATE_LIMIT_DELAY)
|
||||||
|
|
||||||
def fetch_all_accounts():
|
def fetch_all_accounts():
|
||||||
seen_handles = set()
|
seen_handles = set()
|
||||||
all_accounts = []
|
all_accounts = []
|
||||||
for instance in QUERY_INSTANCES:
|
for instance in QUERY_INSTANCES:
|
||||||
log.info(f"directory {instance} ...")
|
_fetch_small_instance(instance, seen_handles, all_accounts)
|
||||||
page = 0
|
|
||||||
while page < MAX_PAGES:
|
|
||||||
offset = page * PAGE_LIMIT
|
|
||||||
url = (f"https://{instance}/api/v1/directory"
|
|
||||||
f"?limit={PAGE_LIMIT}&local=true&offset={offset}")
|
|
||||||
batch = api_get(url)
|
|
||||||
if not batch or not isinstance(batch, list):
|
|
||||||
break
|
|
||||||
added = 0
|
|
||||||
for acc in batch:
|
|
||||||
handle = acc.get("acct", "")
|
|
||||||
if "@" not in handle:
|
|
||||||
handle = f"{handle}@{instance}"
|
|
||||||
if handle in seen_handles:
|
|
||||||
continue
|
|
||||||
seen_handles.add(handle)
|
|
||||||
acc["_handle"] = handle
|
|
||||||
acc["_source_instance"] = instance
|
|
||||||
all_accounts.append(acc)
|
|
||||||
added += 1
|
|
||||||
log.debug(f" {instance} offset={offset}: {added} nových")
|
|
||||||
if len(batch) < PAGE_LIMIT:
|
|
||||||
break
|
|
||||||
page += 1
|
|
||||||
time.sleep(RATE_LIMIT_DELAY)
|
|
||||||
log.info(f" → celkem {len(all_accounts)} unikátních účtů")
|
log.info(f" → celkem {len(all_accounts)} unikátních účtů")
|
||||||
time.sleep(RATE_LIMIT_DELAY)
|
time.sleep(RATE_LIMIT_DELAY)
|
||||||
log.info(f"Sběr hotov: {len(all_accounts)} unikátních účtů")
|
log.info(f"Sběr hotov: {len(all_accounts)} unikátních účtů")
|
||||||
@@ -187,7 +201,7 @@ def build_output(raw):
|
|||||||
})
|
})
|
||||||
seen = set()
|
seen = set()
|
||||||
unique = []
|
unique = []
|
||||||
for r in sorted(results, key=lambda x: x["score"], reverse=True):
|
for r in sorted(results, key=lambda x: x["followers"], reverse=True):
|
||||||
if r["handle"] not in seen:
|
if r["handle"] not in seen:
|
||||||
seen.add(r["handle"])
|
seen.add(r["handle"])
|
||||||
unique.append(r)
|
unique.append(r)
|
||||||
|
|||||||
Reference in New Issue
Block a user