Files
GIA/core/management/commands/backfill_contact_availability.py

136 lines
5.2 KiB
Python

from __future__ import annotations
from typing import Iterable
from django.core.management.base import BaseCommand
from core.models import Message
from core.presence import AvailabilitySignal, record_inferred_signal
from core.presence.inference import now_ms
class Command(BaseCommand):
help = "Backfill inferred contact availability events from historical message/read-receipt activity."
def add_arguments(self, parser):
parser.add_argument("--days", type=int, default=30)
parser.add_argument("--limit", type=int, default=5000)
parser.add_argument("--service", default="")
parser.add_argument("--user-id", default="")
parser.add_argument("--dry-run", action="store_true", default=False)
def _iter_messages(
self, *, days: int, limit: int, service: str, user_id: str
) -> Iterable[Message]:
cutoff_ts = now_ms() - (max(1, int(days)) * 24 * 60 * 60 * 1000)
qs = Message.objects.filter(ts__gte=cutoff_ts).select_related(
"user", "session", "session__identifier", "session__identifier__person"
)
if service:
qs = qs.filter(source_service=str(service).strip().lower())
if user_id:
qs = qs.filter(user_id=str(user_id).strip())
return qs.order_by("ts")[: max(1, int(limit))]
def handle(self, *args, **options):
days = max(1, int(options.get("days") or 30))
limit = max(1, int(options.get("limit") or 5000))
service_filter = str(options.get("service") or "").strip().lower()
user_filter = str(options.get("user_id") or "").strip()
dry_run = bool(options.get("dry_run"))
created = 0
scanned = 0
for msg in self._iter_messages(
days=days, limit=limit, service=service_filter, user_id=user_filter
):
scanned += 1
identifier = getattr(getattr(msg, "session", None), "identifier", None)
person = getattr(identifier, "person", None)
user = getattr(msg, "user", None)
if not identifier or not person or not user:
continue
service = (
str(getattr(msg, "source_service", "") or identifier.service or "")
.strip()
.lower()
)
if not service:
continue
base_ts = int(getattr(msg, "ts", 0) or 0)
message_author = (
str(getattr(msg, "custom_author", "") or "").strip().upper()
)
outgoing = message_author in {"USER", "BOT"}
candidates = []
if base_ts > 0:
candidates.append(
{
"source_kind": "message_out" if outgoing else "message_in",
"availability_state": "available",
"confidence": 0.65 if outgoing else 0.75,
"ts": base_ts,
"payload": {
"origin": "backfill_contact_availability",
"message_id": str(msg.id),
"inferred_from": "message_activity",
},
}
)
read_ts = int(getattr(msg, "read_ts", 0) or 0)
if read_ts > 0:
candidates.append(
{
"source_kind": "read_receipt",
"availability_state": "available",
"confidence": 0.95,
"ts": read_ts,
"payload": {
"origin": "backfill_contact_availability",
"message_id": str(msg.id),
"inferred_from": "read_receipt",
"read_by": str(
getattr(msg, "read_by_identifier", "") or ""
),
},
}
)
for row in candidates:
exists = user.contact_availability_events.filter(
person=person,
person_identifier=identifier,
service=service,
source_kind=row["source_kind"],
ts=int(row["ts"]),
).exists()
if exists:
continue
created += 1
if dry_run:
continue
record_inferred_signal(
AvailabilitySignal(
user=user,
person=person,
person_identifier=identifier,
service=service,
source_kind=row["source_kind"],
availability_state=row["availability_state"],
confidence=float(row["confidence"]),
ts=int(row["ts"]),
payload=dict(row["payload"]),
)
)
self.stdout.write(
self.style.SUCCESS(
f"backfill_contact_availability complete scanned={scanned} created={created} dry_run={dry_run} days={days} limit={limit}"
)
)