346 lines
11 KiB
Python
346 lines
11 KiB
Python
#app.bot
|
|
|
|
import discord
|
|
import aiohttp
|
|
from datetime import datetime, timezone, timedelta
|
|
from discord import app_commands
|
|
from discord.ext import tasks
|
|
|
|
from .config import settings
|
|
from .db import init_db, insert_check, fetch_checks_since,fetch_month_checks
|
|
from .utils import (
|
|
check_site, summarize_counts, get_site_names, render_bar, compute_uptime, format_detection_reason
|
|
)
|
|
|
|
TOKEN = settings.discord_secret_key
|
|
|
|
# Convert Pydantic models → plain dicts once so the rest of bot.py is unchanged.
|
|
MONITORED_SITES = [site.to_dict() for site in settings.monitored_sites]
|
|
|
|
# SITE_CHOICES stays exactly the same — it reads from MONITORED_SITES:
|
|
SITE_CHOICES = [
|
|
app_commands.Choice(name=site["name"], value=site["name"])
|
|
for site in MONITORED_SITES
|
|
]
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# State tracking — populated on startup, updated each poll
|
|
# ---------------------------------------------------------------------------
|
|
# Holds the last known result string per site name: {"Site1": "up", ...}
|
|
last_known_state: dict[str, str] = {}
|
|
|
|
# Tracks when we last sent an alert per site to enforce the cooldown
|
|
last_alerted_at: dict[str, datetime] = {}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Alert helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Transitions that warrant an alert
|
|
ALERT_TRANSITIONS = {
|
|
("up", "down"),
|
|
("up", "degraded"),
|
|
("degraded", "down"),
|
|
("down", "up"), # recovery
|
|
("degraded", "up"), # recovery
|
|
}
|
|
|
|
INCIDENT_EMOJI = {
|
|
"down": "🟥",
|
|
"degraded": "🟨",
|
|
"up": "🟩",
|
|
}
|
|
|
|
|
|
async def maybe_send_alert(
|
|
site_name: str,
|
|
prev: str,
|
|
curr: str,
|
|
result: dict,
|
|
now: datetime,
|
|
) -> None:
|
|
"""Send an alert to the alerts channel if conditions are met."""
|
|
|
|
# Alerts disabled
|
|
if not settings.alert_channel_id:
|
|
return
|
|
|
|
# Not a transition we care about
|
|
if (prev, curr) not in ALERT_TRANSITIONS:
|
|
return
|
|
|
|
is_recovery = curr == "up"
|
|
|
|
# Cooldown — recoveries always bypass so you always know when a site is back
|
|
if not is_recovery:
|
|
last = last_alerted_at.get(site_name)
|
|
cooldown = timedelta(minutes=settings.alert_cooldown_minutes)
|
|
if last and (now - last) < cooldown:
|
|
return
|
|
|
|
last_alerted_at[site_name] = now
|
|
|
|
channel = client.get_channel(settings.alert_channel_id)
|
|
if channel is None:
|
|
print(f"Alert channel {settings.alert_channel_id} not found.")
|
|
return
|
|
|
|
await channel.send(_build_alert_message(site_name, prev, curr, result, now))
|
|
|
|
|
|
def _build_alert_message(
|
|
site_name: str,
|
|
prev: str,
|
|
curr: str,
|
|
result: dict,
|
|
now: datetime,
|
|
) -> str:
|
|
is_recovery = curr == "up"
|
|
emoji = INCIDENT_EMOJI.get(curr, "⬛")
|
|
title = "RECOVERED" if is_recovery else curr.upper()
|
|
timestamp = now.strftime("%H:%M UTC")
|
|
|
|
lines = [
|
|
f"{emoji} **{title}** | {site_name}",
|
|
f"Status: `{prev}` → `{curr}`",
|
|
]
|
|
|
|
if is_recovery:
|
|
if result.get("latency_ms") is not None:
|
|
lines.append(f"Latency: {result['latency_ms']}ms")
|
|
# Approximate downtime from DB
|
|
downtime = _approximate_downtime(site_name, now)
|
|
if downtime:
|
|
lines.append(f"Downtime: ~{downtime}")
|
|
else:
|
|
reason = format_detection_reason(result.get("detection_reason"))
|
|
if reason:
|
|
lines.append(f"Reason: {reason}")
|
|
if result.get("http_status"):
|
|
lines.append(f"HTTP status: {result['http_status']}")
|
|
|
|
lines.append(f"Checked: {timestamp}")
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _approximate_downtime(site_name: str, now: datetime) -> str | None:
|
|
"""
|
|
Walk back through recent checks to find the last 'up' row and return
|
|
a human-readable duration string, e.g. '~45 min' or '~2 hr 10 min'.
|
|
"""
|
|
rows = fetch_checks_since(site_name, now - timedelta(days=1))
|
|
last_up_at = None
|
|
for checked_at, result, *_ in reversed(rows):
|
|
if result == "up":
|
|
last_up_at = datetime.fromisoformat(checked_at)
|
|
break
|
|
|
|
if last_up_at is None:
|
|
return None
|
|
|
|
delta = now - last_up_at
|
|
total_minutes = int(delta.total_seconds() // 60)
|
|
if total_minutes < 60:
|
|
return f"{total_minutes} min"
|
|
hours, mins = divmod(total_minutes, 60)
|
|
return f"{hours} hr {mins} min" if mins else f"{hours} hr"
|
|
|
|
|
|
intents = discord.Intents.default()
|
|
client = discord.Client(intents=intents)
|
|
tree = app_commands.CommandTree(client)
|
|
session: aiohttp.ClientSession | None = None
|
|
|
|
@client.event
|
|
async def on_ready():
|
|
global session
|
|
init_db()
|
|
|
|
if session is None:
|
|
session = aiohttp.ClientSession(headers={"User-Agent": "VoteUptimeBot/1.0"})
|
|
|
|
# Seed state from the most recent DB row per site so we don't false-alert
|
|
# on restart.
|
|
for site in MONITORED_SITES:
|
|
rows = fetch_checks_since(
|
|
site["name"], datetime.now(timezone.utc) - timedelta(hours=1)
|
|
)
|
|
if rows:
|
|
last_known_state[site["name"]] = rows[-1][1] # result column
|
|
|
|
if not poll_sites.is_running():
|
|
poll_sites.start()
|
|
|
|
await tree.sync()
|
|
print(f"Logged in as {client.user}")
|
|
|
|
@tasks.loop(minutes=settings.poll_interval_minutes)
|
|
async def poll_sites():
|
|
now = datetime.now(timezone.utc)
|
|
now_iso = now.isoformat()
|
|
|
|
for site in MONITORED_SITES:
|
|
result = await check_site(session, site)
|
|
site_name = site["name"]
|
|
new_state = result["result"]
|
|
|
|
notes = result["notes"]
|
|
if result.get("detection_reason") and not notes:
|
|
notes = format_detection_reason(result["detection_reason"])
|
|
|
|
insert_check(
|
|
site_name=site_name,
|
|
checked_at=now_iso,
|
|
http_status=result["http_status"],
|
|
latency_ms=result["latency_ms"],
|
|
result=new_state,
|
|
error_type=result["error_type"],
|
|
notes=notes,
|
|
)
|
|
|
|
# --- Alert logic ---
|
|
prev_state = last_known_state.get(site_name)
|
|
last_known_state[site_name] = new_state
|
|
|
|
if prev_state is not None and prev_state != new_state:
|
|
await maybe_send_alert(site_name, prev_state, new_state, result, now)
|
|
|
|
@poll_sites.before_loop
|
|
async def before_poll_sites():
|
|
await client.wait_until_ready()
|
|
|
|
class SiteNameTransformer(app_commands.Transformer):
|
|
async def transform(self, interaction: discord.Interaction, value: str) -> str:
|
|
if value not in get_site_names(MONITORED_SITES):
|
|
raise app_commands.AppCommandError(f"Unknown site: {value}")
|
|
return value
|
|
|
|
uptime_group = app_commands.Group(name="uptime", description="Uptime tools")
|
|
tree.add_command(uptime_group)
|
|
|
|
|
|
@uptime_group.command(name="now", description="Show current configured sites")
|
|
async def uptime_now(interaction: discord.Interaction):
|
|
lines = []
|
|
for site in MONITORED_SITES:
|
|
rows = fetch_checks_since(
|
|
site["name"], datetime.now(timezone.utc) - timedelta(hours=1)
|
|
)
|
|
if not rows:
|
|
lines.append(f"**{site['name']}**: no recent data")
|
|
continue
|
|
|
|
checked_at, result, http_status, latency_ms, error_type = rows[-1]
|
|
|
|
# Emoji prefix for quick scanning
|
|
emoji = {"up": "🟩", "degraded": "🟨", "down": "🟥"}.get(result, "⬛")
|
|
|
|
detail = f"{result.upper()} | status={http_status} | latency={latency_ms}ms"
|
|
if error_type:
|
|
detail += f" | reason={format_detection_reason(error_type)}"
|
|
|
|
lines.append(f"{emoji} **{site['name']}**: {detail}")
|
|
|
|
await interaction.response.send_message("\n".join(lines))
|
|
|
|
|
|
SITE_CHOICES = [
|
|
app_commands.Choice(name=site["name"], value=site["name"])
|
|
for site in MONITORED_SITES
|
|
]
|
|
|
|
@uptime_group.command(name="day", description="Last 24 hours in 15-minute bars")
|
|
@app_commands.describe(site="Site name")
|
|
@app_commands.choices(site=SITE_CHOICES)
|
|
async def uptime_day(
|
|
interaction: discord.Interaction,
|
|
site: app_commands.Transform[str, SiteNameTransformer],
|
|
):
|
|
since = datetime.now(timezone.utc) - timedelta(hours=24)
|
|
rows = fetch_checks_since(site, since)
|
|
results = [row[1] for row in rows]
|
|
|
|
bar = render_bar(results[-96:]) if results else "⬛"
|
|
uptime = compute_uptime(results)
|
|
up, degraded, down = summarize_counts(results)
|
|
|
|
msg = (
|
|
f"**{site}** last 24h\n"
|
|
f"{bar}\n"
|
|
f"Uptime: **{uptime:.2f}%**\n"
|
|
f"Up: {up} | Degraded: {degraded} | Down: {down}"
|
|
)
|
|
await interaction.response.send_message(msg)
|
|
|
|
|
|
@uptime_group.command(name="month", description="Current month summary")
|
|
@app_commands.describe(site="Site name")
|
|
@app_commands.choices(site=SITE_CHOICES)
|
|
async def uptime_month(
|
|
interaction: discord.Interaction,
|
|
site: app_commands.Transform[str, SiteNameTransformer],
|
|
):
|
|
now = datetime.now(timezone.utc)
|
|
rows = fetch_month_checks(site, now.year, now.month)
|
|
|
|
by_day: dict[str, list[str]] = {}
|
|
for checked_at, result, *_ in rows:
|
|
day_key = checked_at[:10]
|
|
by_day.setdefault(day_key, []).append(result)
|
|
|
|
day_bars = []
|
|
for day in sorted(by_day.keys()):
|
|
pct = compute_uptime(by_day[day])
|
|
if pct >= 99:
|
|
day_bars.append("🟩")
|
|
elif pct >= 95:
|
|
day_bars.append("🟨")
|
|
else:
|
|
day_bars.append("🟥")
|
|
|
|
all_results = [row[1] for row in rows]
|
|
uptime = compute_uptime(all_results)
|
|
up, degraded, down = summarize_counts(all_results)
|
|
|
|
msg = (
|
|
f"**{site}** {now.year}-{now.month:02d}\n"
|
|
f"{''.join(day_bars) if day_bars else '⬛'}\n"
|
|
f"Uptime: **{uptime:.2f}%**\n"
|
|
f"Up: {up} | Degraded: {degraded} | Down: {down}"
|
|
)
|
|
await interaction.response.send_message(msg)
|
|
|
|
|
|
@uptime_group.command(name="summarize", description="Summarize current month for all sites")
|
|
async def uptime_summarize(interaction: discord.Interaction):
|
|
now = datetime.now(timezone.utc)
|
|
lines = [f"**Monthly summary for {now.year}-{now.month:02d}**"]
|
|
|
|
for site in MONITORED_SITES:
|
|
rows = fetch_month_checks(site["name"], now.year, now.month)
|
|
results = [row[1] for row in rows]
|
|
uptime = compute_uptime(results)
|
|
up, degraded, down = summarize_counts(results)
|
|
lines.append(
|
|
f"{site['name']}: uptime={uptime:.2f}% | up={up} | degraded={degraded} | down={down}"
|
|
)
|
|
|
|
await interaction.response.send_message("\n".join(lines))
|
|
|
|
|
|
@tree.command(name="incident", description="Placeholder incident review command")
|
|
async def incident(interaction: discord.Interaction):
|
|
await interaction.response.send_message("Incident review command placeholder.")
|
|
|
|
|
|
@tree.command(name="hello", description="Say hello")
|
|
async def hello(interaction: discord.Interaction):
|
|
await interaction.response.send_message("Hello, world!")
|
|
|
|
|
|
@tree.command(name="add", description="Add two numbers")
|
|
async def add(interaction: discord.Interaction, a: float, b: float):
|
|
await interaction.response.send_message(f"Sum: {a + b}")
|
|
|
|
client.run(TOKEN) |