Files
zpravobot-digest/publish_digest.rb

541 lines
13 KiB
Ruby
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
#
# Zprávobot.news - AI Daily Digest Publisher
# Version: 1.0.2 (Fixed URLs)
require 'csv'
require 'json'
require 'time'
require 'net/http'
require 'uri'
require 'optparse'
# ==========================================
# CONFIGURATION
# ==========================================
MASTODON_URL = 'https://zpravobot.news'
CSV_PATH = '/app/data/posts-latest.csv'
ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages'
BOTS = {
'zpravobot' => {
token: ENV['ZPRAVOBOT_TOKEN'],
style: 'neutral',
time_slot: 'morning',
hashtags: '#zpravobot #trendydne'
},
'pozitivni' => {
token: ENV['POZITIVNI_TOKEN'],
style: 'positive',
time_slot: 'noon',
hashtags: '#dobréZprávy #zpravobot'
},
'sarkasticky' => {
token: ENV['SARKASTICKY_TOKEN'],
style: 'sarcastic',
time_slot: 'evening',
hashtags: '#realita #zpravobot'
}
}
# ==========================================
# COMMAND LINE PARSING
# ==========================================
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: publish_digest.rb [options]"
opts.on("--bot BOT", String, "Bot name (zpravobot, pozitivni, sarkasticky)") do |b|
options[:bot] = b
end
opts.on("--dry-run", "Test mode - don't actually publish") do
options[:dry_run] = true
end
opts.on("--date DATE", String, "Process specific date (YYYY-MM-DD)") do |d|
options[:date] = d
end
opts.on("-h", "--help", "Show this help") do
puts opts
exit
end
end.parse!
bot_name = options[:bot]
unless bot_name && BOTS.key?(bot_name)
puts "❌ ERROR: Invalid bot name. Use: zpravobot, pozitivni, or sarkasticky"
exit 1
end
config = BOTS[bot_name]
# Validate environment
unless config[:token]
puts "❌ ERROR: Missing token for @#{bot_name}"
puts " Set environment variable: #{bot_name.upcase}_TOKEN"
exit 1
end
unless ENV['ANTHROPIC_API_KEY']
puts "❌ ERROR: Missing ANTHROPIC_API_KEY"
exit 1
end
# ==========================================
# UTILITIES
# ==========================================
def log(message)
timestamp = Time.now.strftime('%Y-%m-%d %H:%M:%S')
puts "[#{timestamp}] #{message}"
end
def extract_url(text)
text[/https?:\/\/[^\s<>"]+/]
end
# ==========================================
# DATA LOADING
# ==========================================
def load_posts_from_csv(date = nil)
target_date = date || (Time.now - 86400).strftime('%Y-%m-%d')
unless File.exist?(CSV_PATH)
log "❌ CSV file not found: #{CSV_PATH}"
exit 1
end
posts = []
CSV.foreach(CSV_PATH, headers: true, encoding: 'utf-8') do |row|
begin
created = Time.parse(row['created_at'])
if created.strftime('%Y-%m-%d') == target_date
posts << {
'text' => row['text'],
'url' => row['url'] || '',
'created_at' => row['created_at']
}
end
rescue => e
# Skip problematic rows
next
end
end
log "📊 Loaded #{posts.size} posts from #{target_date}"
if posts.empty?
log "⚠️ No posts found for #{target_date}"
exit 1
end
posts
end
# ==========================================
# TOPIC EXTRACTION
# ==========================================
def extract_topics(posts)
topics = Hash.new { |h, k| h[k] = [] }
posts.each do |post|
text = post['text'].downcase
# Add URL to post if not present
post['extracted_url'] = extract_url(post['text']) || post['url']
# Categorize by topic
if text.match?(/trump|venezuela|maduro|grónsko|greenland|usa|bílý dům/)
topics['🌍 Zahraniční politika'] << post
elsif text.match?(/hokej|extraliga|nhl|ms u20/)
topics['🏒 Hokej'] << post
elsif text.match?(/fotbal|chelsea|liga|gól|penalty/)
topics['⚽ Fotbal'] << post
elsif text.match?(/film|seriál|stranger things|hudba|koncert|festival|netflix/)
topics['🎬 Kultura'] << post
elsif text.match?(/počasí|teplota|mráz|sníh|déšť/)
topics['❄️ Počasí'] << post
elsif text.match?(/politika|parlament|vláda|ministr/)
topics['🏛️ Politika'] << post
elsif text.match?(/ekonomika|koruna|inflace|mzdy|ceny/)
topics['💼 Ekonomika'] << post
end
end
# Sort by post count
topics = topics.sort_by { |_, posts| -posts.size }.to_h
log "🔍 Found #{topics.size} topics:"
topics.each { |topic, posts| log " #{topic}: #{posts.size} posts" }
topics
end
# ==========================================
# CONTENT FILTERING BY STYLE
# ==========================================
def filter_topics_by_style(topics, style)
case style
when 'neutral'
topics
when 'positive'
positive_topics = {}
topics.each do |topic, posts|
next if topic.include?('Politika') || topic.include?('Zahraniční')
positive_posts = posts.select do |post|
text = post['text'].downcase
has_positive = text.match?(/úspěch|vítěz|rekord|festival|koncert|ocenění|talent/)
no_negative = !text.match?(/nehoda|smrt|tragédie|havárie|konflikt|krize/)
has_positive && no_negative
end
positive_topics[topic] = positive_posts unless positive_posts.empty?
end
log "💚 Filtered to #{positive_topics.size} positive topics"
positive_topics
when 'sarcastic'
sarcastic_topics = {}
topics.each do |topic, posts|
if topic.include?('Zahraniční') || topic.include?('Politika')
sarcastic_topics[topic] = posts
end
end
if sarcastic_topics.size < 3
topics.each do |topic, posts|
break if sarcastic_topics.size >= 5
sarcastic_topics[topic] = posts unless sarcastic_topics.key?(topic)
end
end
log "😏 Selected #{sarcastic_topics.size} topics for sarcasm"
sarcastic_topics
else
topics
end
end
# ==========================================
# CLAUDE API ANALYSIS
# ==========================================
def analyze_with_claude(posts, topics)
log "🤖 Analyzing with Claude API..."
topic_summary = topics.map { |topic, posts| "#{topic}: #{posts.size}" }.join(', ')
sample_texts = posts[0..49].map { |p| p['text'][0..150] }
prompt = <<~PROMPT
Analyzuj #{posts.size} českých/slovenských zpráv z Mastodon instance Zprávobot.news.
Témata: #{topic_summary}
Ukázka textů:
#{sample_texts[0..9].join("\n---\n")}
Vrať POUZE JSON (žádný markdown):
{
"main_topics": ["téma1", "téma2", "téma3"],
"sentiment": "neutral|positive|negative",
"notable_events": ["událost1", "událost2"]
}
PROMPT
uri = URI(ANTHROPIC_API_URL)
request = Net::HTTP::Post.new(uri)
request['anthropic-version'] = '2023-06-01'
request['content-type'] = 'application/json'
request['x-api-key'] = ENV['ANTHROPIC_API_KEY']
request.body = {
model: 'claude-sonnet-4-20250514',
max_tokens: 1000,
messages: [
{ role: 'user', content: prompt }
]
}.to_json
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
http.request(request)
end
if response.code != '200'
log "⚠️ Claude API error: #{response.code}"
return default_analysis(topics)
end
data = JSON.parse(response.body)
text = data['content'][0]['text']
analysis = JSON.parse(text.gsub(/```json|```/, '').strip)
log "✅ Claude analysis complete"
analysis
rescue => e
log "⚠️ Claude API error: #{e.message}"
default_analysis(topics)
end
def default_analysis(topics)
{
'main_topics' => topics.keys[0..2],
'sentiment' => 'neutral',
'notable_events' => []
}
end
# ==========================================
# TOOT GENERATION
# ==========================================
def generate_summary_toot(posts_count, topics, style, hashtags)
date = (Time.now - 86400).strftime('%d.%m.%Y')
topic_lines = topics.keys[0..4].map do |topic|
count = topics[topic].size
"#{topic} (#{count}#{style == 'sarcastic' ? '×' : ' postů'})"
end
case style
when 'neutral'
summary = <<~TOOT
📊 TRENDY DNE (#{date})
Zpracováno #{posts_count} postů:
#{topic_lines.join("\n")}
#{hashtags}
👇 Odkazy na vybrané články
TOOT
when 'positive'
summary = <<~TOOT
DOBRÉ ZPRÁVY DNE (#{date})
Z dnešních #{posts_count} zpráv vybrané momenty:
#{topic_lines[0..3].join("\n")}
#{hashtags}
👇 Inspirace na čtení
TOOT
when 'sarcastic'
summary = <<~TOOT
😏 DNEŠNÍ REALITA (#{date})
#{posts_count} postů = co se stalo?
#{topic_lines[0..3].join("\n")}
#{hashtags}
👇 Důkazy zmaru
TOOT
end
if summary.length > 500
summary = summary[0..496] + "..."
end
summary.strip
end
def generate_links_toot(topics, style)
links = []
max_topics = 5
max_links_per_topic = 2
topics.keys[0...max_topics].each do |topic|
posts = topics[topic]
links << "\n#{topic}:"
# Select diverse posts (first and middle)
selected = []
selected << posts[0] if posts[0]
selected << posts[posts.size / 2] if posts.size > 1
selected[0...max_links_per_topic].each do |post|
# Extract title (first line, max 50 chars)
title = post['text'].split("\n")[0][0..50].strip
title = title.gsub(/\s+/, ' ')
url = post['extracted_url']
next unless url && !url.empty?
# Use full URL (Mastodon counts it as 23 chars anyway)
links << "#{title}..."
links << " #{url}"
end
end
# Style-specific header and footer
case style
when 'neutral'
header = "📌 VYBRANÉ ČLÁNKY DNE:"
footer = "\n#články #zprávy"
when 'positive'
header = "💚 POZITIVNÍ PŘÍBĚHY DNE:"
footer = "\n💙 Máte skvělý den!\n#inspirace"
when 'sarcastic'
header = "🤡 \"BREAKING NEWS\" DNE:"
footer = "\n🙃 Zítra: repeat\n#sarkasmus"
end
toot = header + links.join("\n") + footer
# Ensure under 500 chars
if toot.length > 500
# Truncate links section
truncated_links = links[0..(links.size * 2 / 3)]
toot = header + truncated_links.join("\n") + footer
if toot.length > 500
toot = toot[0..496] + "..."
end
end
toot.strip
end
# ==========================================
# MASTODON PUBLISHING (DIRECT HTTP)
# ==========================================
def publish_thread(bot_name, summary_toot, links_toot, dry_run: false)
config = BOTS[bot_name]
log "📤 Publishing thread for @#{bot_name}..."
if dry_run
log "🧪 DRY RUN MODE - Not actually publishing"
log "\n--- TOOT 1/2 (#{summary_toot.length} chars) ---"
log summary_toot
log "\n--- TOOT 2/2 (#{links_toot.length} chars) ---"
log links_toot
log "\n✅ Dry run complete"
return [nil, nil]
end
# Publish toot 1
uri = URI("#{MASTODON_URL}/api/v1/statuses")
request = Net::HTTP::Post.new(uri)
request['Authorization'] = "Bearer #{config[:token]}"
request['Content-Type'] = 'application/json'
request.body = { status: summary_toot, visibility: 'public' }.to_json
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
http.request(request)
end
unless response.code == '200'
log "❌ ERROR: #{response.body}"
exit 1
end
toot1_data = JSON.parse(response.body)
toot1_url = toot1_data['url']
toot1_id = toot1_data['id']
log "✅ Toot 1/2 published: #{toot1_url}"
# Publish toot 2 as reply
request2 = Net::HTTP::Post.new(uri)
request2['Authorization'] = "Bearer #{config[:token]}"
request2['Content-Type'] = 'application/json'
request2.body = {
status: links_toot,
in_reply_to_id: toot1_id,
visibility: 'public'
}.to_json
response2 = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
http.request(request2)
end
log "✅ Toot 2/2 published (thread)"
[toot1_data, JSON.parse(response2.body)]
rescue => e
log "❌ ERROR publishing thread: #{e.message}"
exit 1
end
# ==========================================
# MAIN EXECUTION
# ==========================================
def main(bot_name, options = {})
log "🚀 Starting Daily Digest for @#{bot_name}"
log "=" * 60
config = BOTS[bot_name]
posts = load_posts_from_csv(options[:date])
log "\n🔍 Extracting topics..."
all_topics = extract_topics(posts)
topics = filter_topics_by_style(all_topics, config[:style])
if topics.empty?
log "⚠️ No suitable topics found for style: #{config[:style]}"
exit 1
end
log "\n🤖 Analyzing with Claude..."
analysis = analyze_with_claude(posts, topics)
log "\n📝 Generating content..."
summary = generate_summary_toot(posts.size, topics, config[:style], config[:hashtags])
links = generate_links_toot(topics, config[:style])
log " Summary: #{summary.length} chars"
log " Links: #{links.length} chars"
log "\n📤 Publishing to Mastodon..."
toot1, toot2 = publish_thread(bot_name, summary, links, dry_run: options[:dry_run])
log "\n" + "=" * 60
log "✅ Digest complete for @#{bot_name}"
unless options[:dry_run]
log "🔗 Thread: #{toot1['url']}" if toot1
end
end
# Run main
begin
main(bot_name, options)
rescue Interrupt
log "\n⚠️ Interrupted by user"
exit 130
rescue => e
log "❌ FATAL ERROR: #{e.message}"
log " #{e.backtrace[0..4].join("\n ")}"
exit 1
end