Files
Voter-Uptime-Bot/app/bot.py
2026-04-14 22:14:08 -07:00

346 lines
11 KiB
Python

#app.bot
import discord
import aiohttp
from datetime import datetime, timezone, timedelta
from discord import app_commands
from discord.ext import tasks
from .config import settings
from .db import init_db, insert_check, fetch_checks_since,fetch_month_checks
from .utils import (
check_site, summarize_counts, get_site_names, render_bar, compute_uptime, format_detection_reason
)
TOKEN = settings.discord_secret_key
# Convert Pydantic models → plain dicts once so the rest of bot.py is unchanged.
MONITORED_SITES = [site.to_dict() for site in settings.monitored_sites]
# SITE_CHOICES stays exactly the same — it reads from MONITORED_SITES:
SITE_CHOICES = [
app_commands.Choice(name=site["name"], value=site["name"])
for site in MONITORED_SITES
]
# ---------------------------------------------------------------------------
# State tracking — populated on startup, updated each poll
# ---------------------------------------------------------------------------
# Holds the last known result string per site name: {"Site1": "up", ...}
last_known_state: dict[str, str] = {}
# Tracks when we last sent an alert per site to enforce the cooldown
last_alerted_at: dict[str, datetime] = {}
# ---------------------------------------------------------------------------
# Alert helpers
# ---------------------------------------------------------------------------
# Transitions that warrant an alert
ALERT_TRANSITIONS = {
("up", "down"),
("up", "degraded"),
("degraded", "down"),
("down", "up"), # recovery
("degraded", "up"), # recovery
}
INCIDENT_EMOJI = {
"down": "🟥",
"degraded": "🟨",
"up": "🟩",
}
async def maybe_send_alert(
site_name: str,
prev: str,
curr: str,
result: dict,
now: datetime,
) -> None:
"""Send an alert to the alerts channel if conditions are met."""
# Alerts disabled
if not settings.alert_channel_id:
return
# Not a transition we care about
if (prev, curr) not in ALERT_TRANSITIONS:
return
is_recovery = curr == "up"
# Cooldown — recoveries always bypass so you always know when a site is back
if not is_recovery:
last = last_alerted_at.get(site_name)
cooldown = timedelta(minutes=settings.alert_cooldown_minutes)
if last and (now - last) < cooldown:
return
last_alerted_at[site_name] = now
channel = client.get_channel(settings.alert_channel_id)
if channel is None:
print(f"Alert channel {settings.alert_channel_id} not found.")
return
await channel.send(_build_alert_message(site_name, prev, curr, result, now))
def _build_alert_message(
site_name: str,
prev: str,
curr: str,
result: dict,
now: datetime,
) -> str:
is_recovery = curr == "up"
emoji = INCIDENT_EMOJI.get(curr, "")
title = "RECOVERED" if is_recovery else curr.upper()
timestamp = now.strftime("%H:%M UTC")
lines = [
f"{emoji} **{title}** | {site_name}",
f"Status: `{prev}` → `{curr}`",
]
if is_recovery:
if result.get("latency_ms") is not None:
lines.append(f"Latency: {result['latency_ms']}ms")
# Approximate downtime from DB
downtime = _approximate_downtime(site_name, now)
if downtime:
lines.append(f"Downtime: ~{downtime}")
else:
reason = format_detection_reason(result.get("detection_reason"))
if reason:
lines.append(f"Reason: {reason}")
if result.get("http_status"):
lines.append(f"HTTP status: {result['http_status']}")
lines.append(f"Checked: {timestamp}")
return "\n".join(lines)
def _approximate_downtime(site_name: str, now: datetime) -> str | None:
"""
Walk back through recent checks to find the last 'up' row and return
a human-readable duration string, e.g. '~45 min' or '~2 hr 10 min'.
"""
rows = fetch_checks_since(site_name, now - timedelta(days=1))
last_up_at = None
for checked_at, result, *_ in reversed(rows):
if result == "up":
last_up_at = datetime.fromisoformat(checked_at)
break
if last_up_at is None:
return None
delta = now - last_up_at
total_minutes = int(delta.total_seconds() // 60)
if total_minutes < 60:
return f"{total_minutes} min"
hours, mins = divmod(total_minutes, 60)
return f"{hours} hr {mins} min" if mins else f"{hours} hr"
intents = discord.Intents.default()
client = discord.Client(intents=intents)
tree = app_commands.CommandTree(client)
session: aiohttp.ClientSession | None = None
@client.event
async def on_ready():
global session
init_db()
if session is None:
session = aiohttp.ClientSession(headers={"User-Agent": "VoteUptimeBot/1.0"})
# Seed state from the most recent DB row per site so we don't false-alert
# on restart.
for site in MONITORED_SITES:
rows = fetch_checks_since(
site["name"], datetime.now(timezone.utc) - timedelta(hours=1)
)
if rows:
last_known_state[site["name"]] = rows[-1][1] # result column
if not poll_sites.is_running():
poll_sites.start()
await tree.sync()
print(f"Logged in as {client.user}")
@tasks.loop(minutes=settings.poll_interval_minutes)
async def poll_sites():
now = datetime.now(timezone.utc)
now_iso = now.isoformat()
for site in MONITORED_SITES:
result = await check_site(session, site)
site_name = site["name"]
new_state = result["result"]
notes = result["notes"]
if result.get("detection_reason") and not notes:
notes = format_detection_reason(result["detection_reason"])
insert_check(
site_name=site_name,
checked_at=now_iso,
http_status=result["http_status"],
latency_ms=result["latency_ms"],
result=new_state,
error_type=result["error_type"],
notes=notes,
)
# --- Alert logic ---
prev_state = last_known_state.get(site_name)
last_known_state[site_name] = new_state
if prev_state is not None and prev_state != new_state:
await maybe_send_alert(site_name, prev_state, new_state, result, now)
@poll_sites.before_loop
async def before_poll_sites():
await client.wait_until_ready()
class SiteNameTransformer(app_commands.Transformer):
async def transform(self, interaction: discord.Interaction, value: str) -> str:
if value not in get_site_names(MONITORED_SITES):
raise app_commands.AppCommandError(f"Unknown site: {value}")
return value
uptime_group = app_commands.Group(name="uptime", description="Uptime tools")
tree.add_command(uptime_group)
@uptime_group.command(name="now", description="Show current configured sites")
async def uptime_now(interaction: discord.Interaction):
lines = []
for site in MONITORED_SITES:
rows = fetch_checks_since(
site["name"], datetime.now(timezone.utc) - timedelta(hours=1)
)
if not rows:
lines.append(f"**{site['name']}**: no recent data")
continue
checked_at, result, http_status, latency_ms, error_type = rows[-1]
# Emoji prefix for quick scanning
emoji = {"up": "🟩", "degraded": "🟨", "down": "🟥"}.get(result, "")
detail = f"{result.upper()} | status={http_status} | latency={latency_ms}ms"
if error_type:
detail += f" | reason={format_detection_reason(error_type)}"
lines.append(f"{emoji} **{site['name']}**: {detail}")
await interaction.response.send_message("\n".join(lines))
SITE_CHOICES = [
app_commands.Choice(name=site["name"], value=site["name"])
for site in MONITORED_SITES
]
@uptime_group.command(name="day", description="Last 24 hours in 15-minute bars")
@app_commands.describe(site="Site name")
@app_commands.choices(site=SITE_CHOICES)
async def uptime_day(
interaction: discord.Interaction,
site: app_commands.Transform[str, SiteNameTransformer],
):
since = datetime.now(timezone.utc) - timedelta(hours=24)
rows = fetch_checks_since(site, since)
results = [row[1] for row in rows]
bar = render_bar(results[-96:]) if results else ""
uptime = compute_uptime(results)
up, degraded, down = summarize_counts(results)
msg = (
f"**{site}** last 24h\n"
f"{bar}\n"
f"Uptime: **{uptime:.2f}%**\n"
f"Up: {up} | Degraded: {degraded} | Down: {down}"
)
await interaction.response.send_message(msg)
@uptime_group.command(name="month", description="Current month summary")
@app_commands.describe(site="Site name")
@app_commands.choices(site=SITE_CHOICES)
async def uptime_month(
interaction: discord.Interaction,
site: app_commands.Transform[str, SiteNameTransformer],
):
now = datetime.now(timezone.utc)
rows = fetch_month_checks(site, now.year, now.month)
by_day: dict[str, list[str]] = {}
for checked_at, result, *_ in rows:
day_key = checked_at[:10]
by_day.setdefault(day_key, []).append(result)
day_bars = []
for day in sorted(by_day.keys()):
pct = compute_uptime(by_day[day])
if pct >= 99:
day_bars.append("🟩")
elif pct >= 95:
day_bars.append("🟨")
else:
day_bars.append("🟥")
all_results = [row[1] for row in rows]
uptime = compute_uptime(all_results)
up, degraded, down = summarize_counts(all_results)
msg = (
f"**{site}** {now.year}-{now.month:02d}\n"
f"{''.join(day_bars) if day_bars else ''}\n"
f"Uptime: **{uptime:.2f}%**\n"
f"Up: {up} | Degraded: {degraded} | Down: {down}"
)
await interaction.response.send_message(msg)
@uptime_group.command(name="summarize", description="Summarize current month for all sites")
async def uptime_summarize(interaction: discord.Interaction):
now = datetime.now(timezone.utc)
lines = [f"**Monthly summary for {now.year}-{now.month:02d}**"]
for site in MONITORED_SITES:
rows = fetch_month_checks(site["name"], now.year, now.month)
results = [row[1] for row in rows]
uptime = compute_uptime(results)
up, degraded, down = summarize_counts(results)
lines.append(
f"{site['name']}: uptime={uptime:.2f}% | up={up} | degraded={degraded} | down={down}"
)
await interaction.response.send_message("\n".join(lines))
@tree.command(name="incident", description="Placeholder incident review command")
async def incident(interaction: discord.Interaction):
await interaction.response.send_message("Incident review command placeholder.")
@tree.command(name="hello", description="Say hello")
async def hello(interaction: discord.Interaction):
await interaction.response.send_message("Hello, world!")
@tree.command(name="add", description="Add two numbers")
async def add(interaction: discord.Interaction, a: float, b: float):
await interaction.response.send_message(f"Sum: {a + b}")
client.run(TOKEN)