Files
comunidadhll/backend/app/scoreboard_candidate_backfill.py
2026-06-04 09:26:38 +02:00

260 lines
10 KiB
Python

"""Backfill public scoreboard candidates for RCON match link correlation."""
from __future__ import annotations
import argparse
import json
from datetime import datetime, timezone
from collections.abc import Mapping
from typing import Iterable
from .historical_storage import initialize_historical_storage, list_historical_servers, upsert_historical_match
from .postgres_rcon_storage import upsert_scoreboard_candidate
from .providers.public_scoreboard_provider import PublicScoreboardHistoricalDataSource
from .scoreboard_origins import (
build_trusted_scoreboard_match_url,
get_trusted_public_scoreboard_origin,
list_trusted_public_scoreboard_origins,
)
DEFAULT_MAX_PAGES = 20
DEFAULT_PAGE_SIZE = 100
DEFAULT_DETAIL_WORKERS = 4
def main(argv: Iterable[str] | None = None) -> int:
parser = build_arg_parser()
args = parser.parse_args(list(argv) if argv is not None else None)
start_at = _parse_timestamp(args.start_at, option_name="--from")
end_at = _parse_timestamp(args.end_at, option_name="--to")
if end_at <= start_at:
parser.error("--to must be later than --from")
server = _resolve_server(args.server_slug, parser)
report = run_backfill(server=server, start_at=start_at, end_at=end_at, max_pages=args.max_pages, page_size=args.page_size, detail_workers=args.detail_workers)
print(json.dumps(report, ensure_ascii=False, indent=2))
return 0 if not report["errors"] else 1
def run_backfill(*, server: dict[str, object], start_at: datetime, end_at: datetime, max_pages: int, page_size: int, detail_workers: int) -> dict[str, object]:
initialize_historical_storage()
provider = PublicScoreboardHistoricalDataSource()
server_slug = str(server["slug"])
base_url = str(server["scoreboard_base_url"])
counters = {
"pages_processed": 0,
"candidates_seen": 0,
"list_candidates_inserted": 0,
"list_candidates_updated": 0,
"list_candidates_skipped": 0,
"candidates_inserted": 0,
"candidates_updated": 0,
"player_rows_inserted": 0,
"player_rows_updated": 0,
}
errors: list[dict[str, object]] = []
skipped_unsafe_urls = 0
stopped_after_window = False
for page in range(1, max_pages + 1):
try:
page_payload = provider.fetch_match_page(base_url=base_url, page=page, limit=page_size)
except Exception as exc:
errors.append({"stage": "fetch_match_page", "page": page, "message": str(exc)})
break
matches = _coerce_match_list(page_payload.get("maps"))
if not matches:
break
counters["pages_processed"] += 1
ids: list[str] = []
for match in matches:
counters["candidates_seen"] += 1
ref_time = _parse_optional_timestamp(_pick_match_timestamp(match))
if ref_time and ref_time < start_at:
stopped_after_window = True
continue
if ref_time and ref_time >= end_at:
continue
candidate = _build_list_candidate(server=server, match=match)
if candidate is None:
counters["list_candidates_skipped"] += 1
skipped_unsafe_urls += int(_list_candidate_url_is_unsafe(server=server, match=match))
else:
try:
outcome = upsert_scoreboard_candidate(
server_slug=server_slug,
candidate=candidate,
)
except Exception as exc:
counters["list_candidates_skipped"] += 1
errors.append(
{
"stage": "upsert_list_scoreboard_candidate",
"match_id": candidate["external_match_id"],
"message": str(exc),
}
)
else:
counters[f"list_candidates_{outcome}"] += 1
match_id = _stringify(match.get("id"))
if match_id:
ids.append(match_id)
if ids:
try:
details = provider.fetch_match_details(base_url=base_url, match_ids=ids, max_workers=detail_workers)
except Exception as exc:
errors.append({"stage": "fetch_match_details", "page": page, "message": str(exc)})
details = []
for detail in details:
try:
delta = upsert_historical_match(server_slug=server_slug, match_payload=detail)
except Exception as exc:
errors.append({"stage": "upsert_historical_match", "match_id": _stringify(detail.get("id")), "message": str(exc)})
continue
counters["candidates_inserted"] += _coerce_int(delta.get("matches_inserted"))
counters["candidates_updated"] += _coerce_int(delta.get("matches_updated"))
counters["player_rows_inserted"] += _coerce_int(delta.get("player_rows_inserted"))
counters["player_rows_updated"] += _coerce_int(delta.get("player_rows_updated"))
if stopped_after_window:
break
return {"status": "ok" if not errors else "partial", "server": server_slug, "scoreboard_base_url": base_url, "requested_window": {"from": _format_timestamp(start_at), "to": _format_timestamp(end_at)}, "stopped_after_window": stopped_after_window, "skipped_unsafe_urls": skipped_unsafe_urls, "errors": errors, **counters}
def build_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Backfill public scoreboard match candidates for RCON link correlation.")
parser.add_argument("--server", dest="server_slug", required=True)
parser.add_argument("--from", dest="start_at", required=True)
parser.add_argument("--to", dest="end_at", required=True)
parser.add_argument("--max-pages", type=int, default=DEFAULT_MAX_PAGES)
parser.add_argument("--page-size", type=int, default=DEFAULT_PAGE_SIZE)
parser.add_argument("--detail-workers", type=int, default=DEFAULT_DETAIL_WORKERS)
return parser
def _resolve_server(server_slug: str, parser: argparse.ArgumentParser) -> dict[str, object]:
trusted = {origin.slug for origin in list_trusted_public_scoreboard_origins()}
if server_slug not in trusted:
parser.error(f"unknown or untrusted server '{server_slug}'")
for server in list_historical_servers():
if server.get("slug") == server_slug:
return server
parser.error(f"trusted server '{server_slug}' is not present in historical storage")
raise AssertionError("unreachable")
def _parse_timestamp(value: str, *, option_name: str) -> datetime:
try:
parsed = datetime.fromisoformat(value.strip().replace("Z", "+00:00"))
except ValueError as exc:
raise argparse.ArgumentTypeError(f"{option_name} must be an ISO timestamp") from exc
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.astimezone(timezone.utc)
def _parse_optional_timestamp(value: object) -> datetime | None:
if not isinstance(value, str) or not value.strip():
return None
try:
return _parse_timestamp(value, option_name="timestamp")
except argparse.ArgumentTypeError:
return None
def _format_timestamp(value: datetime) -> str:
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
def _coerce_match_list(payload: object) -> list[dict[str, object]]:
return [item for item in payload if isinstance(item, dict)] if isinstance(payload, list) else []
def _pick_match_timestamp(match: dict[str, object]) -> object:
for key in ("end", "start", "creation_time"):
value = match.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
return None
def _build_list_candidate(
*,
server: Mapping[str, object],
match: Mapping[str, object],
) -> dict[str, object] | None:
server_slug = _stringify(server.get("slug"))
external_match_id = _stringify(match.get("id"))
origin = get_trusted_public_scoreboard_origin(server_slug)
map_payload = match.get("map")
result_payload = match.get("result")
if (
origin is None
or not external_match_id
or not external_match_id.isdigit()
or str(server.get("scoreboard_base_url") or "").strip() != origin.base_url
or _coerce_optional_int(server.get("server_number")) != origin.server_number
or _coerce_optional_int(match.get("server_number")) != origin.server_number
or not isinstance(map_payload, Mapping)
or not isinstance(result_payload, Mapping)
):
return None
started_at = _stringify(match.get("start"))
ended_at = _stringify(match.get("end"))
match_url = build_trusted_scoreboard_match_url(
server_slug=server_slug,
external_match_id=external_match_id,
)
if not started_at or not ended_at or not match_url:
return None
return {
"external_match_id": external_match_id,
"started_at": started_at,
"ended_at": ended_at,
"map_name": _stringify(map_payload.get("id") or map_payload.get("name")),
"map_pretty_name": _stringify(map_payload.get("pretty_name")),
"allied_score": _coerce_optional_int(result_payload.get("allied")),
"axis_score": _coerce_optional_int(result_payload.get("axis")),
"player_count": _coerce_optional_int(match.get("player_count")),
"match_url": match_url,
}
def _list_candidate_url_is_unsafe(
*,
server: Mapping[str, object],
match: Mapping[str, object],
) -> bool:
external_match_id = _stringify(match.get("id"))
return bool(
external_match_id
and build_trusted_scoreboard_match_url(
server_slug=server.get("slug"),
external_match_id=external_match_id,
)
is None
)
def _stringify(value: object) -> str | None:
if value is None:
return None
text = str(value).strip()
return text or None
def _coerce_int(value: object) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
def _coerce_optional_int(value: object) -> int | None:
try:
return None if value is None else int(value)
except (TypeError, ValueError):
return None
if __name__ == "__main__":
raise SystemExit(main())