541 lines
13 KiB
Ruby
541 lines
13 KiB
Ruby
#!/usr/bin/env ruby
|
||
# -*- coding: utf-8 -*-
|
||
#
|
||
# Zprávobot.news - AI Daily Digest Publisher
|
||
# Version: 1.0.2 (Fixed URLs)
|
||
|
||
require 'csv'
|
||
require 'json'
|
||
require 'time'
|
||
require 'net/http'
|
||
require 'uri'
|
||
require 'optparse'
|
||
|
||
# ==========================================
|
||
# CONFIGURATION
|
||
# ==========================================
|
||
|
||
MASTODON_URL = 'https://zpravobot.news'
|
||
CSV_PATH = '/app/data/posts-latest.csv'
|
||
ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'
|
||
|
||
BOTS = {
|
||
'zpravobot' => {
|
||
token: ENV['ZPRAVOBOT_TOKEN'],
|
||
style: 'neutral',
|
||
time_slot: 'morning',
|
||
hashtags: '#zpravobot #trendydne'
|
||
},
|
||
'pozitivni' => {
|
||
token: ENV['POZITIVNI_TOKEN'],
|
||
style: 'positive',
|
||
time_slot: 'noon',
|
||
hashtags: '#dobréZprávy #zpravobot'
|
||
},
|
||
'sarkasticky' => {
|
||
token: ENV['SARKASTICKY_TOKEN'],
|
||
style: 'sarcastic',
|
||
time_slot: 'evening',
|
||
hashtags: '#realita #zpravobot'
|
||
}
|
||
}
|
||
|
||
# ==========================================
|
||
# COMMAND LINE PARSING
|
||
# ==========================================
|
||
|
||
options = {}
|
||
OptionParser.new do |opts|
|
||
opts.banner = "Usage: publish_digest.rb [options]"
|
||
|
||
opts.on("--bot BOT", String, "Bot name (zpravobot, pozitivni, sarkasticky)") do |b|
|
||
options[:bot] = b
|
||
end
|
||
|
||
opts.on("--dry-run", "Test mode - don't actually publish") do
|
||
options[:dry_run] = true
|
||
end
|
||
|
||
opts.on("--date DATE", String, "Process specific date (YYYY-MM-DD)") do |d|
|
||
options[:date] = d
|
||
end
|
||
|
||
opts.on("-h", "--help", "Show this help") do
|
||
puts opts
|
||
exit
|
||
end
|
||
end.parse!
|
||
|
||
bot_name = options[:bot]
|
||
|
||
unless bot_name && BOTS.key?(bot_name)
|
||
puts "❌ ERROR: Invalid bot name. Use: zpravobot, pozitivni, or sarkasticky"
|
||
exit 1
|
||
end
|
||
|
||
config = BOTS[bot_name]
|
||
|
||
# Validate environment
|
||
unless config[:token]
|
||
puts "❌ ERROR: Missing token for @#{bot_name}"
|
||
puts " Set environment variable: #{bot_name.upcase}_TOKEN"
|
||
exit 1
|
||
end
|
||
|
||
unless ENV['ANTHROPIC_API_KEY']
|
||
puts "❌ ERROR: Missing ANTHROPIC_API_KEY"
|
||
exit 1
|
||
end
|
||
|
||
# ==========================================
|
||
# UTILITIES
|
||
# ==========================================
|
||
|
||
def log(message)
|
||
timestamp = Time.now.strftime('%Y-%m-%d %H:%M:%S')
|
||
puts "[#{timestamp}] #{message}"
|
||
end
|
||
|
||
def extract_url(text)
|
||
text[/https?:\/\/[^\s<>"]+/]
|
||
end
|
||
|
||
# ==========================================
|
||
# DATA LOADING
|
||
# ==========================================
|
||
|
||
def load_posts_from_csv(date = nil)
|
||
target_date = date || (Time.now - 86400).strftime('%Y-%m-%d')
|
||
|
||
unless File.exist?(CSV_PATH)
|
||
log "❌ CSV file not found: #{CSV_PATH}"
|
||
exit 1
|
||
end
|
||
|
||
posts = []
|
||
|
||
CSV.foreach(CSV_PATH, headers: true, encoding: 'utf-8') do |row|
|
||
begin
|
||
created = Time.parse(row['created_at'])
|
||
|
||
if created.strftime('%Y-%m-%d') == target_date
|
||
posts << {
|
||
'text' => row['text'],
|
||
'url' => row['url'] || '',
|
||
'created_at' => row['created_at']
|
||
}
|
||
end
|
||
rescue => e
|
||
# Skip problematic rows
|
||
next
|
||
end
|
||
end
|
||
|
||
log "📊 Loaded #{posts.size} posts from #{target_date}"
|
||
|
||
if posts.empty?
|
||
log "⚠️ No posts found for #{target_date}"
|
||
exit 1
|
||
end
|
||
|
||
posts
|
||
end
|
||
|
||
# ==========================================
|
||
# TOPIC EXTRACTION
|
||
# ==========================================
|
||
|
||
def extract_topics(posts)
|
||
topics = Hash.new { |h, k| h[k] = [] }
|
||
|
||
posts.each do |post|
|
||
text = post['text'].downcase
|
||
|
||
# Add URL to post if not present
|
||
post['extracted_url'] = extract_url(post['text']) || post['url']
|
||
|
||
# Categorize by topic
|
||
if text.match?(/trump|venezuela|maduro|grónsko|greenland|usa|bílý dům/)
|
||
topics['🌍 Zahraniční politika'] << post
|
||
elsif text.match?(/hokej|extraliga|nhl|ms u20/)
|
||
topics['🏒 Hokej'] << post
|
||
elsif text.match?(/fotbal|chelsea|liga|gól|penalty/)
|
||
topics['⚽ Fotbal'] << post
|
||
elsif text.match?(/film|seriál|stranger things|hudba|koncert|festival|netflix/)
|
||
topics['🎬 Kultura'] << post
|
||
elsif text.match?(/počasí|teplota|mráz|sníh|déšť/)
|
||
topics['❄️ Počasí'] << post
|
||
elsif text.match?(/politika|parlament|vláda|ministr/)
|
||
topics['🏛️ Politika'] << post
|
||
elsif text.match?(/ekonomika|koruna|inflace|mzdy|ceny/)
|
||
topics['💼 Ekonomika'] << post
|
||
end
|
||
end
|
||
|
||
# Sort by post count
|
||
topics = topics.sort_by { |_, posts| -posts.size }.to_h
|
||
|
||
log "🔍 Found #{topics.size} topics:"
|
||
topics.each { |topic, posts| log " #{topic}: #{posts.size} posts" }
|
||
|
||
topics
|
||
end
|
||
|
||
# ==========================================
|
||
# CONTENT FILTERING BY STYLE
|
||
# ==========================================
|
||
|
||
def filter_topics_by_style(topics, style)
|
||
case style
|
||
when 'neutral'
|
||
topics
|
||
|
||
when 'positive'
|
||
positive_topics = {}
|
||
|
||
topics.each do |topic, posts|
|
||
next if topic.include?('Politika') || topic.include?('Zahraniční')
|
||
|
||
positive_posts = posts.select do |post|
|
||
text = post['text'].downcase
|
||
has_positive = text.match?(/úspěch|vítěz|rekord|festival|koncert|ocenění|talent/)
|
||
no_negative = !text.match?(/nehoda|smrt|tragédie|havárie|konflikt|krize/)
|
||
has_positive && no_negative
|
||
end
|
||
|
||
positive_topics[topic] = positive_posts unless positive_posts.empty?
|
||
end
|
||
|
||
log "💚 Filtered to #{positive_topics.size} positive topics"
|
||
positive_topics
|
||
|
||
when 'sarcastic'
|
||
sarcastic_topics = {}
|
||
|
||
topics.each do |topic, posts|
|
||
if topic.include?('Zahraniční') || topic.include?('Politika')
|
||
sarcastic_topics[topic] = posts
|
||
end
|
||
end
|
||
|
||
if sarcastic_topics.size < 3
|
||
topics.each do |topic, posts|
|
||
break if sarcastic_topics.size >= 5
|
||
sarcastic_topics[topic] = posts unless sarcastic_topics.key?(topic)
|
||
end
|
||
end
|
||
|
||
log "😏 Selected #{sarcastic_topics.size} topics for sarcasm"
|
||
sarcastic_topics
|
||
|
||
else
|
||
topics
|
||
end
|
||
end
|
||
|
||
# ==========================================
|
||
# CLAUDE API ANALYSIS
|
||
# ==========================================
|
||
|
||
def analyze_with_claude(posts, topics)
|
||
log "🤖 Analyzing with Claude API..."
|
||
|
||
topic_summary = topics.map { |topic, posts| "#{topic}: #{posts.size}" }.join(', ')
|
||
sample_texts = posts[0..49].map { |p| p['text'][0..150] }
|
||
|
||
prompt = <<~PROMPT
|
||
Analyzuj #{posts.size} českých/slovenských zpráv z Mastodon instance Zprávobot.news.
|
||
|
||
Témata: #{topic_summary}
|
||
|
||
Ukázka textů:
|
||
#{sample_texts[0..9].join("\n---\n")}
|
||
|
||
Vrať POUZE JSON (žádný markdown):
|
||
{
|
||
"main_topics": ["téma1", "téma2", "téma3"],
|
||
"sentiment": "neutral|positive|negative",
|
||
"notable_events": ["událost1", "událost2"]
|
||
}
|
||
PROMPT
|
||
|
||
uri = URI(ANTHROPIC_API_URL)
|
||
request = Net::HTTP::Post.new(uri)
|
||
request['anthropic-version'] = '2023-06-01'
|
||
request['content-type'] = 'application/json'
|
||
request['x-api-key'] = ENV['ANTHROPIC_API_KEY']
|
||
|
||
request.body = {
|
||
model: 'claude-sonnet-4-20250514',
|
||
max_tokens: 1000,
|
||
messages: [
|
||
{ role: 'user', content: prompt }
|
||
]
|
||
}.to_json
|
||
|
||
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
||
http.request(request)
|
||
end
|
||
|
||
if response.code != '200'
|
||
log "⚠️ Claude API error: #{response.code}"
|
||
return default_analysis(topics)
|
||
end
|
||
|
||
data = JSON.parse(response.body)
|
||
text = data['content'][0]['text']
|
||
|
||
analysis = JSON.parse(text.gsub(/```json|```/, '').strip)
|
||
log "✅ Claude analysis complete"
|
||
analysis
|
||
|
||
rescue => e
|
||
log "⚠️ Claude API error: #{e.message}"
|
||
default_analysis(topics)
|
||
end
|
||
|
||
def default_analysis(topics)
|
||
{
|
||
'main_topics' => topics.keys[0..2],
|
||
'sentiment' => 'neutral',
|
||
'notable_events' => []
|
||
}
|
||
end
|
||
|
||
# ==========================================
|
||
# TOOT GENERATION
|
||
# ==========================================
|
||
|
||
def generate_summary_toot(posts_count, topics, style, hashtags)
|
||
date = (Time.now - 86400).strftime('%d.%m.%Y')
|
||
|
||
topic_lines = topics.keys[0..4].map do |topic|
|
||
count = topics[topic].size
|
||
"#{topic} (#{count}#{style == 'sarcastic' ? '×' : ' postů'})"
|
||
end
|
||
|
||
case style
|
||
when 'neutral'
|
||
summary = <<~TOOT
|
||
📊 TRENDY DNE (#{date})
|
||
|
||
Zpracováno #{posts_count} postů:
|
||
|
||
#{topic_lines.join("\n")}
|
||
|
||
#{hashtags}
|
||
|
||
👇 Odkazy na vybrané články
|
||
TOOT
|
||
|
||
when 'positive'
|
||
summary = <<~TOOT
|
||
☀️ DOBRÉ ZPRÁVY DNE (#{date})
|
||
|
||
Z dnešních #{posts_count} zpráv vybrané momenty:
|
||
|
||
#{topic_lines[0..3].join("\n")}
|
||
|
||
#{hashtags}
|
||
|
||
👇 Inspirace na čtení
|
||
TOOT
|
||
|
||
when 'sarcastic'
|
||
summary = <<~TOOT
|
||
😏 DNEŠNÍ REALITA (#{date})
|
||
|
||
#{posts_count} postů = co se stalo?
|
||
|
||
#{topic_lines[0..3].join("\n")}
|
||
|
||
#{hashtags}
|
||
|
||
👇 Důkazy zmaru
|
||
TOOT
|
||
end
|
||
|
||
if summary.length > 500
|
||
summary = summary[0..496] + "..."
|
||
end
|
||
|
||
summary.strip
|
||
end
|
||
|
||
def generate_links_toot(topics, style)
|
||
links = []
|
||
max_topics = 5
|
||
max_links_per_topic = 2
|
||
|
||
topics.keys[0...max_topics].each do |topic|
|
||
posts = topics[topic]
|
||
links << "\n#{topic}:"
|
||
|
||
# Select diverse posts (first and middle)
|
||
selected = []
|
||
selected << posts[0] if posts[0]
|
||
selected << posts[posts.size / 2] if posts.size > 1
|
||
|
||
selected[0...max_links_per_topic].each do |post|
|
||
# Extract title (first line, max 50 chars)
|
||
title = post['text'].split("\n")[0][0..50].strip
|
||
title = title.gsub(/\s+/, ' ')
|
||
|
||
url = post['extracted_url']
|
||
next unless url && !url.empty?
|
||
|
||
# Use full URL (Mastodon counts it as 23 chars anyway)
|
||
links << "• #{title}..."
|
||
links << " #{url}"
|
||
end
|
||
end
|
||
|
||
# Style-specific header and footer
|
||
case style
|
||
when 'neutral'
|
||
header = "📌 VYBRANÉ ČLÁNKY DNE:"
|
||
footer = "\n#články #zprávy"
|
||
|
||
when 'positive'
|
||
header = "💚 POZITIVNÍ PŘÍBĚHY DNE:"
|
||
footer = "\n💙 Máte skvělý den!\n#inspirace"
|
||
|
||
when 'sarcastic'
|
||
header = "🤡 \"BREAKING NEWS\" DNE:"
|
||
footer = "\n🙃 Zítra: repeat\n#sarkasmus"
|
||
end
|
||
|
||
toot = header + links.join("\n") + footer
|
||
|
||
# Ensure under 500 chars
|
||
if toot.length > 500
|
||
# Truncate links section
|
||
truncated_links = links[0..(links.size * 2 / 3)]
|
||
toot = header + truncated_links.join("\n") + footer
|
||
|
||
if toot.length > 500
|
||
toot = toot[0..496] + "..."
|
||
end
|
||
end
|
||
|
||
toot.strip
|
||
end
|
||
|
||
# ==========================================
|
||
# MASTODON PUBLISHING (DIRECT HTTP)
|
||
# ==========================================
|
||
|
||
def publish_thread(bot_name, summary_toot, links_toot, dry_run: false)
|
||
config = BOTS[bot_name]
|
||
|
||
log "📤 Publishing thread for @#{bot_name}..."
|
||
|
||
if dry_run
|
||
log "🧪 DRY RUN MODE - Not actually publishing"
|
||
log "\n--- TOOT 1/2 (#{summary_toot.length} chars) ---"
|
||
log summary_toot
|
||
log "\n--- TOOT 2/2 (#{links_toot.length} chars) ---"
|
||
log links_toot
|
||
log "\n✅ Dry run complete"
|
||
return [nil, nil]
|
||
end
|
||
|
||
# Publish toot 1
|
||
uri = URI("#{MASTODON_URL}/api/v1/statuses")
|
||
request = Net::HTTP::Post.new(uri)
|
||
request['Authorization'] = "Bearer #{config[:token]}"
|
||
request['Content-Type'] = 'application/json'
|
||
request.body = { status: summary_toot, visibility: 'public' }.to_json
|
||
|
||
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
||
http.request(request)
|
||
end
|
||
|
||
unless response.code == '200'
|
||
log "❌ ERROR: #{response.body}"
|
||
exit 1
|
||
end
|
||
|
||
toot1_data = JSON.parse(response.body)
|
||
toot1_url = toot1_data['url']
|
||
toot1_id = toot1_data['id']
|
||
log "✅ Toot 1/2 published: #{toot1_url}"
|
||
|
||
# Publish toot 2 as reply
|
||
request2 = Net::HTTP::Post.new(uri)
|
||
request2['Authorization'] = "Bearer #{config[:token]}"
|
||
request2['Content-Type'] = 'application/json'
|
||
request2.body = {
|
||
status: links_toot,
|
||
in_reply_to_id: toot1_id,
|
||
visibility: 'public'
|
||
}.to_json
|
||
|
||
response2 = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
||
http.request(request2)
|
||
end
|
||
|
||
log "✅ Toot 2/2 published (thread)"
|
||
|
||
[toot1_data, JSON.parse(response2.body)]
|
||
|
||
rescue => e
|
||
log "❌ ERROR publishing thread: #{e.message}"
|
||
exit 1
|
||
end
|
||
|
||
# ==========================================
|
||
# MAIN EXECUTION
|
||
# ==========================================
|
||
|
||
def main(bot_name, options = {})
|
||
log "🚀 Starting Daily Digest for @#{bot_name}"
|
||
log "=" * 60
|
||
|
||
config = BOTS[bot_name]
|
||
|
||
posts = load_posts_from_csv(options[:date])
|
||
|
||
log "\n🔍 Extracting topics..."
|
||
all_topics = extract_topics(posts)
|
||
|
||
topics = filter_topics_by_style(all_topics, config[:style])
|
||
|
||
if topics.empty?
|
||
log "⚠️ No suitable topics found for style: #{config[:style]}"
|
||
exit 1
|
||
end
|
||
|
||
log "\n🤖 Analyzing with Claude..."
|
||
analysis = analyze_with_claude(posts, topics)
|
||
|
||
log "\n📝 Generating content..."
|
||
summary = generate_summary_toot(posts.size, topics, config[:style], config[:hashtags])
|
||
links = generate_links_toot(topics, config[:style])
|
||
|
||
log " Summary: #{summary.length} chars"
|
||
log " Links: #{links.length} chars"
|
||
|
||
log "\n📤 Publishing to Mastodon..."
|
||
toot1, toot2 = publish_thread(bot_name, summary, links, dry_run: options[:dry_run])
|
||
|
||
log "\n" + "=" * 60
|
||
log "✅ Digest complete for @#{bot_name}"
|
||
|
||
unless options[:dry_run]
|
||
log "🔗 Thread: #{toot1['url']}" if toot1
|
||
end
|
||
end
|
||
|
||
# Run main
|
||
begin
|
||
main(bot_name, options)
|
||
rescue Interrupt
|
||
log "\n⚠️ Interrupted by user"
|
||
exit 130
|
||
rescue => e
|
||
log "❌ FATAL ERROR: #{e.message}"
|
||
log " #{e.backtrace[0..4].join("\n ")}"
|
||
exit 1
|
||
end
|