feat: pouze CZ/SK instance, TOP 100 podle followers
This commit is contained in:
158
accounts.csv
158
accounts.csv
@@ -1,61 +1,101 @@
|
||||
Account address,Show boosts
|
||||
FilipHorky@mastodonczech.cz,true
|
||||
alexandrmitrofa@mastodonczech.cz,true
|
||||
petrkou@mastodonczech.cz,true
|
||||
ivanbartos@mastodon.pirati.cz,true
|
||||
xChaos@f.cz,true
|
||||
marcel_kolaja@mastodon.pirati.cz,true
|
||||
jie@mastodonczech.cz,true
|
||||
davidslizek@mastodonczech.cz,true
|
||||
maestrosill@mastodonczech.cz,true
|
||||
OliKockova@cztwitter.cz,true
|
||||
rozanek@mastodonczech.cz,true
|
||||
nolog@witter.cz,true
|
||||
kayla_eilhart@witter.cz,true
|
||||
BohumilVostal@mastodonczech.cz,true
|
||||
filiptitlbach@mastodonczech.cz,true
|
||||
boninska@cztwitter.cz,true
|
||||
365tipu@mastodonczech.cz,true
|
||||
tonline@mastodon.social,true
|
||||
trendytoots@mastodon.social,true
|
||||
realTuckFrumper@mastodon.social,true
|
||||
glyph@mastodon.social,true
|
||||
jbaert@mastodon.social,true
|
||||
MacMagazine@mastodon.social,true
|
||||
Bahnblogstelle@mastodon.social,true
|
||||
Viss@mastodon.social,true
|
||||
3CatInfo@mastodon.social,true
|
||||
rpilocator@mastodon.social,true
|
||||
axios@mastodon.social,true
|
||||
Mrfunkedude@mastodon.social,true
|
||||
charlesgaba@mastodon.social,true
|
||||
verge@mastodon.social,true
|
||||
cmconseils@mastodon.social,true
|
||||
ctietze@mastodon.social,true
|
||||
agiletortoise@mastodon.social,true
|
||||
levelbot@mastodon.social,true
|
||||
macrumors@mastodon.social,true
|
||||
dungeons@mastodon.social,true
|
||||
GoatsLive@mastodon.social,true
|
||||
stroughtonsmith@mastodon.social,true
|
||||
die_reklame@mastodon.social,true
|
||||
gruber@mastodon.social,true
|
||||
Le_M_Poireau@mastodon.social,true
|
||||
shriramk@mastodon.social,true
|
||||
lobsters@mastodon.social,true
|
||||
alternativeto@mas.to,true
|
||||
popcornreel@mas.to,true
|
||||
Jgbird@mas.to,true
|
||||
gabrielesvelto@mas.to,true
|
||||
RantyHighwayman@mas.to,true
|
||||
harriorrihar@mas.to,true
|
||||
Jyoti@mas.to,true
|
||||
gleick@mas.to,true
|
||||
Techaltar@mas.to,true
|
||||
trumpet@mas.to,true
|
||||
kityates@mas.to,true
|
||||
Stoned_Deva_@mas.to,true
|
||||
barunori2023@mas.to,true
|
||||
libreleah@mas.to,true
|
||||
kims@mas.to,true
|
||||
SocraticEthics@mastodon.online,true
|
||||
streetartutopia@mastodon.online,true
|
||||
9to5Mac@mastodon.online,true
|
||||
9to5google@mastodon.online,true
|
||||
tinoeberl@mastodon.online,true
|
||||
NOSRSS@mastodon.online,true
|
||||
globalmuseum@mastodon.online,true
|
||||
mastodonmigration@mastodon.online,true
|
||||
danirabbit@mastodon.online,true
|
||||
Aviation_Librarian@mastodon.online,true
|
||||
gupton68@mastodon.online,true
|
||||
phastidio@mastodon.online,true
|
||||
YaLTeR@mastodon.online,true
|
||||
newsthump@mastodon.online,true
|
||||
nikitonsky@mastodon.online,true
|
||||
astro_jcm@mastodon.online,true
|
||||
apache_be@mastodon.online,true
|
||||
rychlofky@mastodonczech.cz,true
|
||||
parlamentnizpravycz@mastodonczech.cz,true
|
||||
Pirati@mastodon.pirati.cz,true
|
||||
zandl@mastodon.pirati.cz,true
|
||||
fabia_man@mamutovo.cz,true
|
||||
Roman_M@cztwitter.cz,true
|
||||
marekl@mastodonczech.cz,true
|
||||
marketkag@mastodon.pirati.cz,true
|
||||
VladaFoltan@mastodonczech.cz,true
|
||||
bohdandlouhy@mastodonczech.cz,true
|
||||
Apolenarychlikova@witter.cz,true
|
||||
beneslenka@mastodonczech.cz,true
|
||||
byczech@witter.cz,true
|
||||
zoul@boskovice.social,true
|
||||
tymoty@f.cz,true
|
||||
OttovonWenkoff@mastodonczech.cz,true
|
||||
Unreed@mastodonczech.cz,true
|
||||
lupa@mastodonczech.cz,true
|
||||
TomasFriedl@mastodonczech.cz,true
|
||||
trnk_c@witter.cz,true
|
||||
sumavanet@mastodonczech.cz,true
|
||||
honzakorinek@cztwitter.cz,true
|
||||
fipa@mastodonczech.cz,true
|
||||
zbiejczuk@mastodonczech.cz,true
|
||||
Otakar_Brabec@cztwitter.cz,true
|
||||
blataak@mastodonczech.cz,true
|
||||
hajma@cztwitter.cz,true
|
||||
piskvor@cztwitter.cz,true
|
||||
zechy@mastodonczech.cz,true
|
||||
piratzbrna@mastodonczech.cz,true
|
||||
paveljanicek@mamutovo.cz,true
|
||||
porucikPihrt@cztwitter.cz,true
|
||||
s_kavou_v_ruce@cztwitter.cz,true
|
||||
mskalick@witter.cz,true
|
||||
prrejpal@cztwitter.cz,true
|
||||
margo@mastodon.arch-linux.cz,true
|
||||
infoekcz@mamutovo.cz,true
|
||||
baadvo@mastodonczech.cz,true
|
||||
cernejpudinkcz@mastodonczech.cz,true
|
||||
simindr@mastodonczech.cz,true
|
||||
DivkaVModrem@cztwitter.cz,true
|
||||
kalendar@mastodon.arch-linux.cz,true
|
||||
madla@mastodonczech.cz,true
|
||||
ChorozonX@cztwitter.cz,true
|
||||
xmirek@mastodonczech.cz,true
|
||||
czstatistika@mastodonczech.cz,true
|
||||
technotramp@mastodonczech.cz,true
|
||||
DamianVCechov@cztwitter.cz,true
|
||||
andrej@mastodon.pirati.cz,true
|
||||
vitex@f.cz,true
|
||||
KaterinaGloserova@mastodonczech.cz,true
|
||||
bycx@mastodonczech.cz,true
|
||||
medidekpu@cztwitter.cz,true
|
||||
janmelvil@mastodonczech.cz,true
|
||||
Jankajanicka0@cztwitter.cz,true
|
||||
Erdela@mastodonczech.cz,true
|
||||
Ilusie@mastodon.arch-linux.cz,true
|
||||
otecfura@witter.cz,true
|
||||
Lenislavka@mastodonczech.cz,true
|
||||
caragraph@f.cz,true
|
||||
smoon@mamutovo.cz,true
|
||||
lepapierblanc@mastodonczech.cz,true
|
||||
vitsoft@cztwitter.cz,true
|
||||
Tichy_koutek@mastodonczech.cz,true
|
||||
xcabal05@mamutovo.cz,true
|
||||
babaq@mastodonczech.cz,true
|
||||
tatageek@witter.cz,true
|
||||
medvjed@witter.cz,true
|
||||
jachym@mastodonczech.cz,true
|
||||
Neovlivnicz@witter.cz,true
|
||||
Petr90@mamutovo.cz,true
|
||||
honzajavorek@mastodonczech.cz,true
|
||||
PavelUngr@mastodonczech.cz,true
|
||||
tomulinek@mastodonczech.cz,true
|
||||
backorka@cztwitter.cz,true
|
||||
Onqa6@mastodon.arch-linux.cz,true
|
||||
Razemix@mamutovo.cz,true
|
||||
amarok@mastodonczech.cz,true
|
||||
OpalkovaH@cztwitter.cz,true
|
||||
anlexcz@witter.cz,true
|
||||
tomasmartinek@mastodon.pirati.cz,true
|
||||
lacertacz@mastodonczech.cz,true
|
||||
sibik@mastodonczech.cz,true
|
||||
|
||||
|
2032
accounts.json
2032
accounts.json
File diff suppressed because it is too large
Load Diff
@@ -18,11 +18,27 @@ Cron (každý den v 3:00):
|
||||
0 3 * * * /usr/bin/python3 /opt/mastodon-start/mastodon_cz_accounts.py --output /var/www/start/ >> /var/log/mastodon-start.log 2>&1
|
||||
"""
|
||||
|
||||
import json, csv, time, re, argparse, logging
|
||||
import json, csv, time, re, argparse, logging, os
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
import urllib.request, urllib.error, urllib.parse
|
||||
|
||||
def _load_token():
|
||||
token = os.environ.get("MASTODON_TOKEN")
|
||||
if token:
|
||||
return token.strip()
|
||||
env_path = Path(__file__).parent / ".env"
|
||||
if env_path.exists():
|
||||
for line in env_path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("MASTODON_TOKEN="):
|
||||
return line.split("=", 1)[1].strip()
|
||||
if line and not line.startswith("#") and "=" not in line:
|
||||
return line # raw token value
|
||||
return None
|
||||
|
||||
MASTODON_TOKEN = _load_token()
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -38,28 +54,23 @@ QUERY_INSTANCES = [
|
||||
"lgbtcz.social", # 7 CZ uživatelů
|
||||
"boskovice.social", # 5 CZ uživatelů
|
||||
"mamutovo.cz",
|
||||
# Velké instance – filtr language=cs/sk
|
||||
"mastodon.social", # 346 CZ uživatelů
|
||||
"mas.to", # 33 CZ uživatelů
|
||||
"mastodon.online", # 16 CZ uživatelů
|
||||
"mastodon.world", # 14 CZ uživatelů
|
||||
"mstdn.social", # 12 CZ uživatelů
|
||||
"masto.ai", # 7 CZ uživatelů
|
||||
"fosstodon.org", # 7 CZ uživatelů
|
||||
"infosec.exchange", # 5 CZ uživatelů
|
||||
]
|
||||
|
||||
MIN_STATUSES = 10
|
||||
MIN_FOLLOWERS = 10
|
||||
MAX_DAYS_INACTIVE = 30
|
||||
TOP_N = 60
|
||||
MAX_DAYS_INACTIVE = 365
|
||||
TOP_N = 100
|
||||
RATE_LIMIT_DELAY = 1.2
|
||||
PAGE_LIMIT = 80
|
||||
MAX_PAGES = 10
|
||||
|
||||
# ── HTTP ──────────────────────────────────────
|
||||
def api_get(url, timeout=12):
|
||||
def api_get(url, timeout=15):
|
||||
headers = {"User-Agent": "MamutovoStarterBot/1.0 (+https://mamutovo.cz)"}
|
||||
if MASTODON_TOKEN:
|
||||
headers["Authorization"] = f"Bearer {MASTODON_TOKEN}"
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "MamutovoStarterBot/1.0 (+https://mamutovo.cz)"})
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as r:
|
||||
return json.loads(r.read().decode())
|
||||
except urllib.error.HTTPError as e:
|
||||
@@ -72,36 +83,39 @@ def api_get(url, timeout=12):
|
||||
log.debug(f"Chyba {url}: {e}"); return None
|
||||
|
||||
# ── SBĚR ─────────────────────────────────────
|
||||
def _fetch_small_instance(instance, seen_handles, all_accounts):
|
||||
"""Malé CZ/SK instance: bereme všechny uživatele z directory."""
|
||||
log.info(f"directory {instance} ...")
|
||||
page = 0
|
||||
while page < MAX_PAGES:
|
||||
offset = page * PAGE_LIMIT
|
||||
url = (f"https://{instance}/api/v1/directory"
|
||||
f"?limit={PAGE_LIMIT}&local=true&offset={offset}")
|
||||
batch = api_get(url)
|
||||
if not batch or not isinstance(batch, list):
|
||||
break
|
||||
added = 0
|
||||
for acc in batch:
|
||||
acct = acc.get("acct", "")
|
||||
handle = acct if "@" in acct else f"{acct}@{instance}"
|
||||
if handle in seen_handles:
|
||||
continue
|
||||
seen_handles.add(handle)
|
||||
acc["_handle"] = handle
|
||||
acc["_source_instance"] = instance
|
||||
all_accounts.append(acc)
|
||||
added += 1
|
||||
log.debug(f" {instance} offset={offset}: {added} nových")
|
||||
if len(batch) < PAGE_LIMIT:
|
||||
break
|
||||
page += 1
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
|
||||
def fetch_all_accounts():
|
||||
seen_handles = set()
|
||||
all_accounts = []
|
||||
for instance in QUERY_INSTANCES:
|
||||
log.info(f"directory {instance} ...")
|
||||
page = 0
|
||||
while page < MAX_PAGES:
|
||||
offset = page * PAGE_LIMIT
|
||||
url = (f"https://{instance}/api/v1/directory"
|
||||
f"?limit={PAGE_LIMIT}&local=true&offset={offset}")
|
||||
batch = api_get(url)
|
||||
if not batch or not isinstance(batch, list):
|
||||
break
|
||||
added = 0
|
||||
for acc in batch:
|
||||
handle = acc.get("acct", "")
|
||||
if "@" not in handle:
|
||||
handle = f"{handle}@{instance}"
|
||||
if handle in seen_handles:
|
||||
continue
|
||||
seen_handles.add(handle)
|
||||
acc["_handle"] = handle
|
||||
acc["_source_instance"] = instance
|
||||
all_accounts.append(acc)
|
||||
added += 1
|
||||
log.debug(f" {instance} offset={offset}: {added} nových")
|
||||
if len(batch) < PAGE_LIMIT:
|
||||
break
|
||||
page += 1
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
_fetch_small_instance(instance, seen_handles, all_accounts)
|
||||
log.info(f" → celkem {len(all_accounts)} unikátních účtů")
|
||||
time.sleep(RATE_LIMIT_DELAY)
|
||||
log.info(f"Sběr hotov: {len(all_accounts)} unikátních účtů")
|
||||
@@ -187,7 +201,7 @@ def build_output(raw):
|
||||
})
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in sorted(results, key=lambda x: x["score"], reverse=True):
|
||||
for r in sorted(results, key=lambda x: x["followers"], reverse=True):
|
||||
if r["handle"] not in seen:
|
||||
seen.add(r["handle"])
|
||||
unique.append(r)
|
||||
|
||||
Reference in New Issue
Block a user