This commit is contained in:
devRaGonSa
2026-06-05 16:57:25 +02:00
commit 0da8338ba8
310 changed files with 45849 additions and 0 deletions

View File

@@ -0,0 +1,667 @@
"""Dedicated prospective RCON historical capture worker."""
from __future__ import annotations
import argparse
from datetime import date, datetime
import json
import os
import time
from dataclasses import dataclass
from typing import Iterable
from .config import (
get_rcon_capture_mode,
get_rcon_current_match_capture_interval_seconds,
get_rcon_current_match_writer_lock_timeout_seconds,
get_rcon_historical_capture_interval_seconds,
get_rcon_historical_capture_max_retries,
get_rcon_historical_capture_retry_delay_seconds,
get_rcon_request_timeout_seconds,
get_rcon_skip_historical_materialization,
)
from .rcon_admin_log_ingestion import ingest_rcon_admin_logs
from .rcon_admin_log_materialization import materialize_rcon_admin_log
from .rcon_client import (
RconQueryError,
build_rcon_target_key,
load_rcon_targets,
query_live_server_sample,
)
from .rcon_historical_storage import (
finalize_rcon_historical_capture_run,
initialize_rcon_historical_storage,
list_rcon_historical_target_statuses,
mark_rcon_historical_capture_failure,
persist_rcon_historical_sample,
start_rcon_historical_capture_run,
)
from .snapshots import utc_now
from .writer_lock import (
backend_writer_lock,
build_writer_lock_holder,
)
CAPTURE_MODE_HISTORICAL = "historical"
CAPTURE_MODE_CURRENT_LIVE = "current-live"
@dataclass(slots=True)
class RconHistoricalCaptureStats:
targets_seen: int = 0
samples_inserted: int = 0
duplicate_samples: int = 0
failed_targets: int = 0
admin_log_events_seen: int = 0
admin_log_events_inserted: int = 0
admin_log_duplicate_events: int = 0
admin_log_failed_targets: int = 0
materialized_matches_inserted: int = 0
materialized_matches_updated: int = 0
def run_rcon_historical_capture(
*,
target_key: str | None = None,
capture_mode: str | None = None,
skip_materialization: bool | None = None,
writer_lock_timeout_seconds: float | None = None,
) -> dict[str, object]:
"""Capture one prospective RCON sample for one or all configured targets."""
resolved_capture_mode, resolved_skip_materialization = _resolve_capture_options(
capture_mode=capture_mode,
skip_materialization=skip_materialization,
)
resolved_lock_timeout = writer_lock_timeout_seconds
if resolved_lock_timeout is None and resolved_capture_mode == CAPTURE_MODE_CURRENT_LIVE:
resolved_lock_timeout = get_rcon_current_match_writer_lock_timeout_seconds()
with backend_writer_lock(
holder=build_writer_lock_holder(
f"app.rcon_historical_worker capture:{target_key or 'all-targets'}"
),
timeout_seconds=resolved_lock_timeout,
):
return run_rcon_historical_capture_unlocked(
target_key=target_key,
capture_mode=resolved_capture_mode,
skip_materialization=resolved_skip_materialization,
)
def run_rcon_historical_capture_unlocked(
*,
target_key: str | None = None,
capture_mode: str | None = None,
skip_materialization: bool | None = None,
) -> dict[str, object]:
"""Capture one prospective RCON sample assuming the shared writer lock is already held."""
resolved_capture_mode, resolved_skip_materialization = _resolve_capture_options(
capture_mode=capture_mode,
skip_materialization=skip_materialization,
)
initialize_rcon_historical_storage()
selected_targets = _select_targets(target_key)
selected_target_keys = {build_rcon_target_key(target) for target in selected_targets}
admin_log_lookback_minutes = get_rcon_admin_log_lookback_minutes()
captured_at = utc_now().isoformat().replace("+00:00", "Z")
target_scope = target_key or "all-configured-rcon-targets"
run_id = start_rcon_historical_capture_run(
mode=resolved_capture_mode,
target_scope=target_scope,
)
stats = RconHistoricalCaptureStats()
items: list[dict[str, object]] = []
errors: list[dict[str, object]] = []
admin_log_errors: list[dict[str, object]] = []
timeout_seconds = get_rcon_request_timeout_seconds()
try:
for target in selected_targets:
target_metadata = _serialize_target(target)
stats.targets_seen += 1
try:
sample = query_live_server_sample(
target,
timeout_seconds=timeout_seconds,
)
delta = persist_rcon_historical_sample(
run_id=run_id,
captured_at=captured_at,
target=target_metadata,
normalized_payload=sample["normalized"],
raw_payload=sample["raw_session"],
)
stats.samples_inserted += int(delta["samples_inserted"])
stats.duplicate_samples += int(delta["duplicate_samples"])
items.append(
{
"target_key": target_metadata["target_key"],
"external_server_id": target.external_server_id,
"name": target.name,
"host": target.host,
"port": target.port,
"timeout_seconds": timeout_seconds,
"captured_at": captured_at,
"sample_inserted": bool(delta["samples_inserted"]),
"normalized": sample["normalized"],
}
)
except Exception as exc: # noqa: BLE001 - controlled worker failures
stats.failed_targets += 1
mark_rcon_historical_capture_failure(
run_id=run_id,
target=target_metadata,
error_message=_format_error_message(exc),
)
errors.append(_serialize_capture_error(target, exc, timeout_seconds=timeout_seconds))
admin_log_result = _ingest_target_admin_log(
target_key=str(target_metadata["target_key"]),
minutes=admin_log_lookback_minutes,
)
_merge_admin_log_result(
stats=stats,
admin_log_errors=admin_log_errors,
target=target_metadata,
result=admin_log_result,
)
materialization_result = _run_materialization_if_enabled(
skip_materialization=resolved_skip_materialization
)
if not resolved_skip_materialization:
stats.materialized_matches_inserted = int(
materialization_result.get("matches_materialized") or 0
)
stats.materialized_matches_updated = int(
materialization_result.get("matches_updated") or 0
)
status = "success" if not errors else ("partial" if items else "failed")
finalize_rcon_historical_capture_run(
run_id,
status=status,
targets_seen=stats.targets_seen,
samples_inserted=stats.samples_inserted,
duplicate_samples=stats.duplicate_samples,
failed_targets=stats.failed_targets,
notes=None if not errors else json.dumps(errors, separators=(",", ":")),
)
except Exception as exc:
finalize_rcon_historical_capture_run(
run_id,
status="failed",
targets_seen=stats.targets_seen,
samples_inserted=stats.samples_inserted,
duplicate_samples=stats.duplicate_samples,
failed_targets=max(1, stats.failed_targets),
notes=str(exc),
)
raise
return {
"status": "ok" if items else "error",
"run_status": status,
"captured_at": captured_at,
"target_scope": target_scope,
"capture_mode": resolved_capture_mode,
"materialization_skipped": resolved_skip_materialization,
"admin_log_lookback_minutes": admin_log_lookback_minutes,
"admin_log_events_seen": stats.admin_log_events_seen,
"admin_log_events_inserted": stats.admin_log_events_inserted,
"duplicate_events": stats.admin_log_duplicate_events,
"samples_inserted": stats.samples_inserted,
"targets": items,
"errors": errors,
"admin_log_errors": admin_log_errors,
"materialization_result": materialization_result,
"storage_status": [
status
for status in list_rcon_historical_target_statuses()
if status.get("target_key") in selected_target_keys
],
"totals": {
"targets_seen": stats.targets_seen,
"samples_inserted": stats.samples_inserted,
"duplicate_samples": stats.duplicate_samples,
"failed_targets": stats.failed_targets,
"admin_log_events_seen": stats.admin_log_events_seen,
"admin_log_events_inserted": stats.admin_log_events_inserted,
"admin_log_duplicate_events": stats.admin_log_duplicate_events,
"admin_log_failed_targets": stats.admin_log_failed_targets,
"materialized_matches_inserted": stats.materialized_matches_inserted,
"materialized_matches_updated": stats.materialized_matches_updated,
},
}
def run_periodic_rcon_historical_capture(
*,
interval_seconds: int,
max_retries: int,
retry_delay_seconds: int,
target_key: str | None = None,
capture_mode: str | None = None,
skip_materialization: bool | None = None,
max_runs: int | None = None,
) -> None:
"""Run prospective RCON capture in a local loop."""
resolved_capture_mode, resolved_skip_materialization = _resolve_capture_options(
capture_mode=capture_mode,
skip_materialization=skip_materialization,
)
completed_runs = 0
startup_targets = _describe_loop_targets(target_key)
_emit_worker_event(
"rcon-historical-capture-worker-started",
interval_seconds=interval_seconds,
max_retries=max_retries,
retry_delay_seconds=retry_delay_seconds,
capture_mode=resolved_capture_mode,
materialization_skipped=resolved_skip_materialization,
target_scope=target_key or "all-configured-rcon-targets",
target_count=len(startup_targets),
targets=startup_targets,
)
print("Press Ctrl+C to stop.")
try:
while max_runs is None or completed_runs < max_runs:
completed_runs += 1
_emit_worker_event(
"rcon-historical-capture-cycle-started",
run=completed_runs,
)
payload = _run_capture_with_retries(
max_retries=max_retries,
retry_delay_seconds=retry_delay_seconds,
target_key=target_key,
capture_mode=resolved_capture_mode,
skip_materialization=resolved_skip_materialization,
)
_emit_worker_event(
"rcon-historical-capture-cycle-finished",
run=completed_runs,
result=payload,
)
if max_runs is not None and completed_runs >= max_runs:
break
_emit_worker_event(
"rcon-historical-capture-sleep-started",
run=completed_runs,
sleep_seconds=interval_seconds,
)
time.sleep(interval_seconds)
except KeyboardInterrupt:
print("\nRCON historical capture loop stopped by user.")
except Exception as exc:
_emit_worker_event(
"rcon-historical-capture-worker-exited-unexpectedly",
error_type=type(exc).__name__,
message=str(exc),
)
raise
def _run_capture_with_retries(
*,
max_retries: int,
retry_delay_seconds: int,
target_key: str | None,
capture_mode: str,
skip_materialization: bool,
) -> dict[str, object]:
attempt = 0
while True:
attempt += 1
try:
return {
"status": "ok",
"attempts_used": attempt,
"capture_result": run_rcon_historical_capture(
target_key=target_key,
capture_mode=capture_mode,
skip_materialization=skip_materialization,
),
}
except Exception as exc:
if attempt > max_retries:
_emit_worker_event(
"rcon-historical-capture-attempt-failed",
attempt=attempt,
max_retries=max_retries,
error_type=type(exc).__name__,
message=str(exc),
retries_exhausted=True,
)
return {
"status": "error",
"attempts_used": attempt,
"error": str(exc),
}
_emit_worker_event(
"rcon-historical-capture-attempt-failed",
attempt=attempt,
max_retries=max_retries,
error_type=type(exc).__name__,
message=str(exc),
)
if retry_delay_seconds > 0:
_emit_worker_event(
"rcon-historical-capture-retry-sleep-started",
attempt=attempt,
sleep_seconds=retry_delay_seconds,
)
time.sleep(retry_delay_seconds)
def _select_targets(target_key: str | None) -> list[object]:
configured_targets = list(load_rcon_targets())
if not configured_targets:
raise RuntimeError("No RCON targets configured in HLL_BACKEND_RCON_TARGETS.")
if target_key is None:
return configured_targets
normalized = target_key.strip()
selected = [
target
for target in configured_targets
if build_rcon_target_key(target) == normalized
]
if not selected:
raise ValueError(f"Unknown RCON target key: {target_key}")
return selected
def _describe_loop_targets(target_key: str | None) -> list[dict[str, str]]:
"""Describe configured worker targets without exposing credentials."""
try:
targets = _select_targets(target_key)
except Exception as exc: # noqa: BLE001 - startup logging must not hide capture error
return [
{
"status": "unavailable",
"error_type": type(exc).__name__,
"message": str(exc),
}
]
return [
{
"target_key": build_rcon_target_key(target),
"external_server_id": str(target.external_server_id or ""),
"name": str(target.name or ""),
}
for target in targets
]
def _emit_worker_event(event: str, **fields: object) -> None:
"""Print one JSON worker event using safe date/time serialization."""
print(
json.dumps({"event": event, **fields}, indent=2, default=_json_default),
flush=True,
)
def _json_default(value: object) -> str:
if isinstance(value, (date, datetime)):
return value.isoformat()
return str(value)
def get_rcon_admin_log_lookback_minutes() -> int:
"""Return the AdminLog lookback window used by periodic RCON capture."""
configured_value = os.getenv("HLL_BACKEND_RCON_ADMIN_LOG_LOOKBACK_MINUTES", "60")
lookback_minutes = int(configured_value)
if lookback_minutes <= 0:
raise ValueError("HLL_BACKEND_RCON_ADMIN_LOG_LOOKBACK_MINUTES must be positive.")
return lookback_minutes
def _ingest_target_admin_log(
*,
target_key: str,
minutes: int,
) -> dict[str, object]:
try:
return ingest_rcon_admin_logs(minutes=minutes, target_key=target_key)
except Exception as exc: # noqa: BLE001 - worker reports per-target AdminLog failures
return {
"status": "error",
"errors": [
{
"target_key": target_key,
"status": "error",
"error_type": type(exc).__name__,
"message": str(exc),
}
],
"totals": {
"events_seen": 0,
"events_inserted": 0,
"duplicate_events": 0,
"failed_targets": 1,
},
}
def _merge_admin_log_result(
*,
stats: RconHistoricalCaptureStats,
admin_log_errors: list[dict[str, object]],
target: dict[str, object],
result: dict[str, object],
) -> None:
totals = result.get("totals")
if isinstance(totals, dict):
stats.admin_log_events_seen += int(totals.get("events_seen") or 0)
stats.admin_log_events_inserted += int(totals.get("events_inserted") or 0)
stats.admin_log_duplicate_events += int(totals.get("duplicate_events") or 0)
stats.admin_log_failed_targets += int(totals.get("failed_targets") or 0)
errors = result.get("errors")
if isinstance(errors, list):
for error in errors:
if isinstance(error, dict):
admin_log_errors.append(
{
"target_key": target["target_key"],
"external_server_id": target.get("external_server_id"),
"name": target.get("name"),
"status": "error",
"error_type": error.get("error_type"),
"message": error.get("message"),
}
)
def _serialize_target(target: object) -> dict[str, object]:
return {
"target_key": build_rcon_target_key(target),
"external_server_id": target.external_server_id,
"name": target.name,
"host": target.host,
"port": target.port,
"region": target.region,
"game_port": target.game_port,
"query_port": target.query_port,
"source_name": target.source_name,
}
def _serialize_capture_error(
target: object,
error: Exception,
*,
timeout_seconds: float,
) -> dict[str, object]:
error_type = _classify_capture_error_type(error)
error_stage = _classify_capture_error_stage(error)
return {
"target_key": build_rcon_target_key(target),
"external_server_id": target.external_server_id,
"name": target.name,
"host": target.host,
"port": target.port,
"timeout_seconds": timeout_seconds,
"error_type": error_type,
"error_stage": error_stage,
"message": str(error),
}
def _classify_capture_error_type(error: Exception) -> str:
if isinstance(error, RconQueryError):
return error.error_type
message = str(error).lower()
if "timed out" in message or "timeout" in message:
return "timeout"
if "401" in message or "403" in message or "login" in message or "auth" in message:
return "auth/login"
if "refused" in message:
return "connection-refused"
if "payload" in message or "json" in message or "malformed" in message:
return "payload-invalid"
return "other-error"
def _classify_capture_error_stage(error: Exception) -> str | None:
if isinstance(error, RconQueryError):
return error.error_stage
return None
def _format_error_message(error: Exception) -> str:
error_type = _classify_capture_error_type(error)
error_stage = _classify_capture_error_stage(error)
if error_stage:
return f"[{error_type}:{error_stage}] {error}"
return f"[{error_type}] {error}"
def _resolve_capture_options(
*,
capture_mode: str | None,
skip_materialization: bool | None,
) -> tuple[str, bool]:
resolved_capture_mode = capture_mode or get_rcon_capture_mode()
if resolved_capture_mode not in {CAPTURE_MODE_HISTORICAL, CAPTURE_MODE_CURRENT_LIVE}:
raise ValueError("capture_mode must be 'historical' or 'current-live'.")
if skip_materialization is None:
resolved_skip_materialization = get_rcon_skip_historical_materialization()
else:
resolved_skip_materialization = skip_materialization
if resolved_capture_mode == CAPTURE_MODE_CURRENT_LIVE:
resolved_skip_materialization = True
return resolved_capture_mode, resolved_skip_materialization
def _run_materialization_if_enabled(*, skip_materialization: bool) -> dict[str, object]:
if skip_materialization:
return {
"status": "skipped",
"reason": "skip-materialization-enabled",
}
return materialize_rcon_admin_log()
def build_arg_parser() -> argparse.ArgumentParser:
"""Create the CLI parser for manual or periodic prospective RCON capture."""
parser = argparse.ArgumentParser(
description="Prospective RCON historical capture for HLL Vietnam.",
)
parser.add_argument(
"mode",
choices=("capture", "loop"),
help="capture runs once; loop keeps collecting periodically",
)
parser.add_argument(
"--target",
dest="target_key",
help="optional target key; defaults to all configured RCON targets",
)
parser.add_argument(
"--interval",
type=int,
default=get_rcon_historical_capture_interval_seconds(),
help="seconds to wait between loop runs",
)
parser.add_argument(
"--retries",
type=int,
default=get_rcon_historical_capture_max_retries(),
help="retry attempts after a failed capture",
)
parser.add_argument(
"--retry-delay",
type=int,
default=get_rcon_historical_capture_retry_delay_seconds(),
help="seconds to wait between failed attempts",
)
parser.add_argument(
"--max-runs",
type=int,
help="optional safety cap for loop mode",
)
parser.add_argument(
"--capture-mode",
choices=(CAPTURE_MODE_HISTORICAL, CAPTURE_MODE_CURRENT_LIVE),
default=get_rcon_capture_mode(),
help="historical keeps materialization; current-live only captures lightweight live data",
)
parser.add_argument(
"--skip-materialization",
action="store_true",
default=None,
help="capture AdminLog and live snapshots without running heavy historical materialization",
)
return parser
def main(argv: Iterable[str] | None = None) -> int:
"""Run the prospective RCON historical capture CLI."""
parser = build_arg_parser()
args = parser.parse_args(list(argv) if argv is not None else None)
if args.mode == "capture":
result = run_rcon_historical_capture(
target_key=args.target_key,
capture_mode=args.capture_mode,
skip_materialization=args.skip_materialization,
)
print(json.dumps(result, indent=2, default=_json_default))
return 0
default_interval = (
get_rcon_current_match_capture_interval_seconds()
if args.capture_mode == CAPTURE_MODE_CURRENT_LIVE
and "--interval" not in (argv or [])
else args.interval
)
args.interval = default_interval
if args.interval <= 0:
raise ValueError("--interval must be a positive integer.")
if args.retries < 0:
raise ValueError("--retries must be zero or positive.")
if args.retry_delay < 0:
raise ValueError("--retry-delay must be zero or positive.")
if args.max_runs is not None and args.max_runs <= 0:
raise ValueError("--max-runs must be positive when provided.")
run_periodic_rcon_historical_capture(
interval_seconds=args.interval,
max_retries=args.retries,
retry_delay_seconds=args.retry_delay,
target_key=args.target_key,
capture_mode=args.capture_mode,
skip_materialization=args.skip_materialization,
max_runs=args.max_runs,
)
return 0
if __name__ == "__main__":
raise SystemExit(main())