"""Backfill public scoreboard candidates for RCON match link correlation.""" from __future__ import annotations import argparse import json from datetime import datetime, timezone from collections.abc import Mapping from typing import Iterable from .historical_storage import initialize_historical_storage, list_historical_servers, upsert_historical_match from .postgres_rcon_storage import upsert_scoreboard_candidate from .providers.public_scoreboard_provider import PublicScoreboardHistoricalDataSource from .scoreboard_origins import ( build_trusted_scoreboard_match_url, get_trusted_public_scoreboard_origin, list_trusted_public_scoreboard_origins, ) DEFAULT_MAX_PAGES = 20 DEFAULT_PAGE_SIZE = 100 DEFAULT_DETAIL_WORKERS = 4 def main(argv: Iterable[str] | None = None) -> int: parser = build_arg_parser() args = parser.parse_args(list(argv) if argv is not None else None) start_at = _parse_timestamp(args.start_at, option_name="--from") end_at = _parse_timestamp(args.end_at, option_name="--to") if end_at <= start_at: parser.error("--to must be later than --from") server = _resolve_server(args.server_slug, parser) report = run_backfill(server=server, start_at=start_at, end_at=end_at, max_pages=args.max_pages, page_size=args.page_size, detail_workers=args.detail_workers) print(json.dumps(report, ensure_ascii=False, indent=2)) return 0 if not report["errors"] else 1 def run_backfill(*, server: dict[str, object], start_at: datetime, end_at: datetime, max_pages: int, page_size: int, detail_workers: int) -> dict[str, object]: initialize_historical_storage() provider = PublicScoreboardHistoricalDataSource() server_slug = str(server["slug"]) base_url = str(server["scoreboard_base_url"]) counters = { "pages_processed": 0, "candidates_seen": 0, "list_candidates_inserted": 0, "list_candidates_updated": 0, "list_candidates_skipped": 0, "candidates_inserted": 0, "candidates_updated": 0, "player_rows_inserted": 0, "player_rows_updated": 0, } errors: list[dict[str, object]] = [] skipped_unsafe_urls = 0 stopped_after_window = False for page in range(1, max_pages + 1): try: page_payload = provider.fetch_match_page(base_url=base_url, page=page, limit=page_size) except Exception as exc: errors.append({"stage": "fetch_match_page", "page": page, "message": str(exc)}) break matches = _coerce_match_list(page_payload.get("maps")) if not matches: break counters["pages_processed"] += 1 ids: list[str] = [] for match in matches: counters["candidates_seen"] += 1 ref_time = _parse_optional_timestamp(_pick_match_timestamp(match)) if ref_time and ref_time < start_at: stopped_after_window = True continue if ref_time and ref_time >= end_at: continue candidate = _build_list_candidate(server=server, match=match) if candidate is None: counters["list_candidates_skipped"] += 1 skipped_unsafe_urls += int(_list_candidate_url_is_unsafe(server=server, match=match)) else: try: outcome = upsert_scoreboard_candidate( server_slug=server_slug, candidate=candidate, ) except Exception as exc: counters["list_candidates_skipped"] += 1 errors.append( { "stage": "upsert_list_scoreboard_candidate", "match_id": candidate["external_match_id"], "message": str(exc), } ) else: counters[f"list_candidates_{outcome}"] += 1 match_id = _stringify(match.get("id")) if match_id: ids.append(match_id) if ids: try: details = provider.fetch_match_details(base_url=base_url, match_ids=ids, max_workers=detail_workers) except Exception as exc: errors.append({"stage": "fetch_match_details", "page": page, "message": str(exc)}) details = [] for detail in details: try: delta = upsert_historical_match(server_slug=server_slug, match_payload=detail) except Exception as exc: errors.append({"stage": "upsert_historical_match", "match_id": _stringify(detail.get("id")), "message": str(exc)}) continue counters["candidates_inserted"] += _coerce_int(delta.get("matches_inserted")) counters["candidates_updated"] += _coerce_int(delta.get("matches_updated")) counters["player_rows_inserted"] += _coerce_int(delta.get("player_rows_inserted")) counters["player_rows_updated"] += _coerce_int(delta.get("player_rows_updated")) if stopped_after_window: break return {"status": "ok" if not errors else "partial", "server": server_slug, "scoreboard_base_url": base_url, "requested_window": {"from": _format_timestamp(start_at), "to": _format_timestamp(end_at)}, "stopped_after_window": stopped_after_window, "skipped_unsafe_urls": skipped_unsafe_urls, "errors": errors, **counters} def build_arg_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description="Backfill public scoreboard match candidates for RCON link correlation.") parser.add_argument("--server", dest="server_slug", required=True) parser.add_argument("--from", dest="start_at", required=True) parser.add_argument("--to", dest="end_at", required=True) parser.add_argument("--max-pages", type=int, default=DEFAULT_MAX_PAGES) parser.add_argument("--page-size", type=int, default=DEFAULT_PAGE_SIZE) parser.add_argument("--detail-workers", type=int, default=DEFAULT_DETAIL_WORKERS) return parser def _resolve_server(server_slug: str, parser: argparse.ArgumentParser) -> dict[str, object]: trusted = {origin.slug for origin in list_trusted_public_scoreboard_origins()} if server_slug not in trusted: parser.error(f"unknown or untrusted server '{server_slug}'") for server in list_historical_servers(): if server.get("slug") == server_slug: return server parser.error(f"trusted server '{server_slug}' is not present in historical storage") raise AssertionError("unreachable") def _parse_timestamp(value: str, *, option_name: str) -> datetime: try: parsed = datetime.fromisoformat(value.strip().replace("Z", "+00:00")) except ValueError as exc: raise argparse.ArgumentTypeError(f"{option_name} must be an ISO timestamp") from exc if parsed.tzinfo is None: parsed = parsed.replace(tzinfo=timezone.utc) return parsed.astimezone(timezone.utc) def _parse_optional_timestamp(value: object) -> datetime | None: if not isinstance(value, str) or not value.strip(): return None try: return _parse_timestamp(value, option_name="timestamp") except argparse.ArgumentTypeError: return None def _format_timestamp(value: datetime) -> str: return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") def _coerce_match_list(payload: object) -> list[dict[str, object]]: return [item for item in payload if isinstance(item, dict)] if isinstance(payload, list) else [] def _pick_match_timestamp(match: dict[str, object]) -> object: for key in ("end", "start", "creation_time"): value = match.get(key) if isinstance(value, str) and value.strip(): return value.strip() return None def _build_list_candidate( *, server: Mapping[str, object], match: Mapping[str, object], ) -> dict[str, object] | None: server_slug = _stringify(server.get("slug")) external_match_id = _stringify(match.get("id")) origin = get_trusted_public_scoreboard_origin(server_slug) map_payload = match.get("map") result_payload = match.get("result") if ( origin is None or not external_match_id or not external_match_id.isdigit() or str(server.get("scoreboard_base_url") or "").strip() != origin.base_url or _coerce_optional_int(server.get("server_number")) != origin.server_number or _coerce_optional_int(match.get("server_number")) != origin.server_number or not isinstance(map_payload, Mapping) or not isinstance(result_payload, Mapping) ): return None started_at = _stringify(match.get("start")) ended_at = _stringify(match.get("end")) match_url = build_trusted_scoreboard_match_url( server_slug=server_slug, external_match_id=external_match_id, ) if not started_at or not ended_at or not match_url: return None return { "external_match_id": external_match_id, "started_at": started_at, "ended_at": ended_at, "map_name": _stringify(map_payload.get("id") or map_payload.get("name")), "map_pretty_name": _stringify(map_payload.get("pretty_name")), "allied_score": _coerce_optional_int(result_payload.get("allied")), "axis_score": _coerce_optional_int(result_payload.get("axis")), "player_count": _coerce_optional_int(match.get("player_count")), "match_url": match_url, } def _list_candidate_url_is_unsafe( *, server: Mapping[str, object], match: Mapping[str, object], ) -> bool: external_match_id = _stringify(match.get("id")) return bool( external_match_id and build_trusted_scoreboard_match_url( server_slug=server.get("slug"), external_match_id=external_match_id, ) is None ) def _stringify(value: object) -> str | None: if value is None: return None text = str(value).strip() return text or None def _coerce_int(value: object) -> int: try: return int(value or 0) except (TypeError, ValueError): return 0 def _coerce_optional_int(value: object) -> int | None: try: return None if value is None else int(value) except (TypeError, ValueError): return None if __name__ == "__main__": raise SystemExit(main())