Implement Manticore fully and re-theme

This commit is contained in:
2026-03-11 02:19:08 +00:00
parent da044be68c
commit cbedcd67f6
46 changed files with 3444 additions and 944 deletions

213
core/events/behavior.py Normal file
View File

@@ -0,0 +1,213 @@
from __future__ import annotations
import json
import statistics
from dataclasses import dataclass
from typing import Any
def safe_int(value: Any, default: int = 0) -> int:
try:
return int(value)
except Exception:
return int(default)
def parse_payload(value: Any) -> dict:
if isinstance(value, dict):
return dict(value)
if isinstance(value, str):
text = value.strip()
if not text:
return {}
try:
loaded = json.loads(text)
except Exception:
return {}
if isinstance(loaded, dict):
return dict(loaded)
return {}
def median_ms(values: list[int]) -> int:
clean = [int(v) for v in values if safe_int(v, 0) > 0]
if not clean:
return 0
return int(statistics.median(clean))
def z_score(value: int, baseline_samples: list[int]) -> float:
clean = [int(v) for v in baseline_samples if safe_int(v, 0) > 0]
if len(clean) < 2:
return 0.0
baseline = statistics.median(clean)
stdev = statistics.pstdev(clean)
if stdev <= 0:
return 0.0
return float((float(value) - float(baseline)) / float(stdev))
@dataclass
class CompositionState:
started_ts: int
last_started_ts: int
stopped_ts: int = 0
revision: int = 1
class ComposingTracker:
def __init__(self, window_ms: int = 300000):
self.window_ms = max(1000, int(window_ms or 300000))
self._state: dict[str, CompositionState] = {}
def observe_started(self, session_id: str, ts: int) -> CompositionState:
key = str(session_id or "").strip()
if not key:
raise ValueError("session_id is required")
safe_ts_value = max(0, safe_int(ts, 0))
state = self._state.get(key)
if state is None:
state = CompositionState(
started_ts=safe_ts_value,
last_started_ts=safe_ts_value,
revision=1,
)
self._state[key] = state
return state
if state.stopped_ts > 0:
state.revision += 1
state.last_started_ts = safe_ts_value
state.stopped_ts = 0
return state
def observe_stopped(self, session_id: str, ts: int) -> dict | None:
key = str(session_id or "").strip()
state = self._state.get(key)
if state is None:
return None
safe_ts_value = max(0, safe_int(ts, 0))
duration_ms = max(0, safe_ts_value - int(state.started_ts or 0))
if duration_ms >= self.window_ms:
self._state.pop(key, None)
return {
"started_ts": int(state.started_ts or 0),
"stopped_ts": safe_ts_value,
"duration_ms": duration_ms,
"revision": int(state.revision or 1),
"abandoned": True,
}
state.stopped_ts = safe_ts_value
return None
def observe_message(self, session_id: str) -> CompositionState | None:
key = str(session_id or "").strip()
if not key:
return None
return self._state.pop(key, None)
def extract_metric_samples(rows: list[dict]) -> dict[str, list[int]]:
delivered_by_message: dict[str, int] = {}
read_by_message: dict[str, int] = {}
delay_c_samples: list[int] = []
delay_f_samples: list[int] = []
revision_samples: list[int] = []
abandoned_started = 0
abandoned_total = 0
composition_by_session: dict[str, dict[str, int]] = {}
presence_by_session: dict[str, int] = {}
for row in sorted(
list(rows or []),
key=lambda item: (
safe_int(item.get("ts"), 0),
str(item.get("kind") or ""),
str(item.get("session_id") or ""),
),
):
kind = str(row.get("kind") or "").strip().lower()
session_id = str(row.get("session_id") or "").strip()
ts = safe_int(row.get("ts"), 0)
payload = parse_payload(row.get("payload"))
message_id = str(
payload.get("message_id")
or payload.get("origin_message_id")
or row.get("origin_message_id")
or ""
).strip()
if kind == "message_delivered" and message_id:
delivered_by_message[message_id] = ts
continue
if kind == "message_read" and message_id:
read_by_message[message_id] = ts
continue
if kind == "presence_available" and session_id:
presence_by_session[session_id] = ts
continue
if kind == "composing_started" and session_id:
abandoned_started += 1
state = composition_by_session.get(session_id)
if state is None:
state = {"started_ts": ts, "revision": 1}
composition_by_session[session_id] = state
else:
state["revision"] = int(state.get("revision", 1)) + 1
if presence_by_session.get(session_id):
delta = ts - int(presence_by_session.get(session_id) or 0)
if delta >= 0:
delay_f_samples.append(delta)
continue
if kind == "composing_abandoned":
abandoned_total += 1
if session_id:
composition_by_session.pop(session_id, None)
continue
if kind == "message_sent" and session_id:
state = composition_by_session.pop(session_id, None)
if state is None:
continue
delta = ts - int(state.get("started_ts") or 0)
if delta >= 0:
delay_c_samples.append(delta)
revision_samples.append(max(1, int(state.get("revision") or 1)))
delay_b_samples = []
for message_id, delivered_ts in delivered_by_message.items():
read_ts = safe_int(read_by_message.get(message_id), 0)
if read_ts > 0 and read_ts >= delivered_ts:
delay_b_samples.append(read_ts - delivered_ts)
abandoned_rate_samples = []
if abandoned_started > 0:
abandoned_rate_samples.append(
int(round((float(abandoned_total) / float(abandoned_started)) * 1000))
)
return {
"delay_b": delay_b_samples,
"delay_c": delay_c_samples,
"delay_f": delay_f_samples,
"revision": revision_samples,
"abandoned_rate": abandoned_rate_samples,
}
def summarize_metrics(window_rows: list[dict], baseline_rows: list[dict]) -> dict[str, dict]:
window_samples = extract_metric_samples(window_rows)
baseline_samples = extract_metric_samples(baseline_rows)
metrics: dict[str, dict] = {}
for metric in ("delay_b", "delay_c", "delay_f", "revision", "abandoned_rate"):
samples = list(window_samples.get(metric) or [])
if not samples:
continue
baseline = list(baseline_samples.get(metric) or [])
value = median_ms(samples)
baseline_value = median_ms(baseline)
metrics[metric] = {
"value_ms": int(value),
"baseline_ms": int(baseline_value),
"z_score": float(round(z_score(value, baseline), 6)),
"sample_n": len(samples),
}
return metrics

View File

@@ -5,12 +5,19 @@ import time
from asgiref.sync import sync_to_async
from django.conf import settings
from core.events.manticore import get_event_ledger_backend
from core.models import ConversationEvent
from core.observability.tracing import ensure_trace_id
from core.util import logs
log = logs.get_logger("event-ledger")
def event_ledger_enabled() -> bool:
return bool(getattr(settings, "EVENT_LEDGER_DUAL_WRITE", False))
return bool(
getattr(settings, "EVENT_LEDGER_DUAL_WRITE", False)
or getattr(settings, "EVENT_PRIMARY_WRITE_PATH", False)
)
def event_ledger_status() -> dict:
@@ -72,38 +79,78 @@ def append_event_sync(
normalized_direction = _normalize_direction(direction)
normalized_trace = ensure_trace_id(trace_id, payload or {})
safe_ts = _safe_ts(ts)
transport = str(origin_transport or "").strip().lower()
message_id = str(origin_message_id or "").strip()
dedup_row = None
if transport and message_id:
dedup_row = (
ConversationEvent.objects.filter(
actor_identifier = str(actor_identifier or "").strip()
origin_chat_id = str(origin_chat_id or "").strip()
payload = dict(payload or {})
raw_payload = dict(raw_payload or {})
dual_write = bool(getattr(settings, "EVENT_LEDGER_DUAL_WRITE", False))
primary_write = bool(getattr(settings, "EVENT_PRIMARY_WRITE_PATH", False))
write_django = dual_write and not primary_write
row = None
if write_django:
dedup_row = None
if transport and message_id:
dedup_row = (
ConversationEvent.objects.filter(
user=user,
session=session,
event_type=normalized_type,
origin_transport=transport,
origin_message_id=message_id,
)
.order_by("-created_at")
.first()
)
if dedup_row is not None:
row = dedup_row
else:
row = ConversationEvent.objects.create(
user=user,
session=session,
ts=safe_ts,
event_type=normalized_type,
direction=normalized_direction,
actor_identifier=actor_identifier,
origin_transport=transport,
origin_message_id=message_id,
origin_chat_id=origin_chat_id,
payload=payload,
raw_payload=raw_payload,
trace_id=normalized_trace,
)
.order_by("-created_at")
.first()
)
if dedup_row is not None:
return dedup_row
return ConversationEvent.objects.create(
user=user,
session=session,
ts=_safe_ts(ts),
event_type=normalized_type,
direction=normalized_direction,
actor_identifier=str(actor_identifier or "").strip(),
origin_transport=transport,
origin_message_id=message_id,
origin_chat_id=str(origin_chat_id or "").strip(),
payload=dict(payload or {}),
raw_payload=dict(raw_payload or {}),
trace_id=normalized_trace,
)
try:
get_event_ledger_backend().upsert_event(
user_id=int(user.id),
person_id=str(session.identifier.person_id),
session_id=str(session.id),
event_type=normalized_type,
direction=normalized_direction,
ts=safe_ts,
actor_identifier=actor_identifier,
origin_transport=transport,
origin_message_id=message_id,
origin_chat_id=origin_chat_id,
payload=payload,
raw_payload=raw_payload,
trace_id=normalized_trace,
)
except Exception as exc:
if primary_write:
raise
log.warning(
"Event ledger manticore dual-write failed session=%s event_type=%s err=%s",
getattr(session, "id", "-"),
normalized_type,
exc,
)
return row
async def append_event(**kwargs):

588
core/events/manticore.py Normal file
View File

@@ -0,0 +1,588 @@
from __future__ import annotations
import hashlib
import json
import time
from urllib.parse import urlparse, urlunparse
from typing import Any
import requests
from django.conf import settings
from core.models import ConversationEvent
from core.util import logs
from core.events.behavior import parse_payload
log = logs.get_logger("event-manticore")
class ManticoreEventLedgerBackend:
_table_ready_cache: dict[str, float] = {}
_table_ready_ttl_seconds = 30.0
def __init__(self):
self.base_url = str(
getattr(settings, "MANTICORE_HTTP_URL", "http://localhost:9308")
).rstrip("/")
self.table = (
str(getattr(settings, "MANTICORE_EVENT_TABLE", "gia_events")).strip()
or "gia_events"
)
self.metrics_table = (
str(getattr(settings, "MANTICORE_METRIC_TABLE", "gia_metrics")).strip()
or "gia_metrics"
)
self.timeout_seconds = int(getattr(settings, "MANTICORE_HTTP_TIMEOUT", 5) or 5)
self._table_cache_key = f"{self.base_url}|{self.table}"
self._metrics_cache_key = f"{self.base_url}|{self.metrics_table}"
def _candidate_base_urls(self) -> list[str]:
parsed = urlparse(self.base_url)
hostname = str(parsed.hostname or "").strip().lower()
candidates = [self.base_url]
if hostname in {"localhost", "127.0.0.1"}:
replacement = parsed._replace(netloc=f"host.containers.internal:{parsed.port or 9308}")
candidates.append(urlunparse(replacement))
output = []
seen = set()
for value in candidates:
key = str(value or "").strip()
if not key or key in seen:
continue
seen.add(key)
output.append(key)
return output
def _sql(self, query: str) -> dict[str, Any]:
last_exc = None
for base_url in self._candidate_base_urls():
try:
response = requests.post(
f"{base_url}/sql",
data={"mode": "raw", "query": query},
timeout=self.timeout_seconds,
)
response.raise_for_status()
payload = response.json()
if base_url != self.base_url:
self.base_url = base_url.rstrip("/")
self._table_cache_key = f"{self.base_url}|{self.table}"
self._metrics_cache_key = f"{self.base_url}|{self.metrics_table}"
if isinstance(payload, list):
return payload[0] if payload else {}
return dict(payload or {})
except Exception as exc:
last_exc = exc
if last_exc is not None:
raise last_exc
return {}
def ensure_table(self) -> None:
last_ready = float(
self._table_ready_cache.get(self._table_cache_key, 0.0) or 0.0
)
if (time.time() - last_ready) <= float(self._table_ready_ttl_seconds):
return
self._sql(
(
f"CREATE TABLE IF NOT EXISTS {self.table} ("
"id BIGINT,"
"user_id BIGINT,"
"person_id STRING,"
"session_id STRING,"
"transport STRING,"
"kind STRING,"
"direction STRING,"
"ts BIGINT,"
"ts_ref BIGINT,"
"actor STRING,"
"duration_ms BIGINT,"
"abandoned INTEGER,"
"revision INTEGER,"
"payload JSON"
") engine='columnar' min_infix_len='2'"
)
)
self._table_ready_cache[self._table_cache_key] = time.time()
def ensure_metrics_table(self) -> None:
last_ready = float(
self._table_ready_cache.get(self._metrics_cache_key, 0.0) or 0.0
)
if (time.time() - last_ready) <= float(self._table_ready_ttl_seconds):
return
self._sql(
(
f"CREATE TABLE IF NOT EXISTS {self.metrics_table} ("
"id BIGINT,"
"user_id BIGINT,"
"person_id STRING,"
"window_days INTEGER,"
"metric STRING,"
"value_ms BIGINT,"
"baseline_ms BIGINT,"
"z_score FLOAT,"
"sample_n INTEGER,"
"computed_at BIGINT"
") engine='columnar'"
)
)
self._table_ready_cache[self._metrics_cache_key] = time.time()
def _escape(self, value: Any) -> str:
text = str(value or "")
return text.replace("\\", "\\\\").replace("'", "\\'")
def _event_id(self, *, logical_key: str) -> int:
digest = hashlib.blake2b(
str(logical_key or "").encode("utf-8"),
digest_size=8,
).digest()
value = int.from_bytes(digest, byteorder="big", signed=False)
return max(1, int(value))
def _event_kind(self, event_type: str) -> str:
normalized = str(event_type or "").strip().lower()
return {
"message_created": "message_sent",
"delivery_receipt": "message_delivered",
"read_receipt": "message_read",
"typing_started": "composing_started",
"typing_stopped": "composing_stopped",
"composing_abandoned": "composing_abandoned",
"presence_available": "presence_available",
"presence_unavailable": "presence_unavailable",
}.get(normalized, normalized)
def _rows_from_sql_payload(self, payload: dict[str, Any]) -> list[dict]:
data = payload.get("data") or payload.get("hits") or []
if isinstance(data, dict):
data = [data]
rows = []
for row in list(data or []):
if isinstance(row, dict):
rows.append(dict(row))
return rows
def _build_values(
self,
*,
user_id: int,
person_id: str,
session_id: str,
event_type: str,
direction: str,
ts: int,
actor_identifier: str,
origin_transport: str,
origin_message_id: str,
origin_chat_id: str,
payload: dict | None,
raw_payload: dict | None,
trace_id: str,
) -> str:
data = dict(payload or {})
if raw_payload:
data["raw_payload"] = dict(raw_payload)
if trace_id:
data["trace_id"] = str(trace_id)
if origin_message_id:
data["origin_message_id"] = str(origin_message_id)
if origin_chat_id:
data["origin_chat_id"] = str(origin_chat_id)
data["legacy_event_type"] = str(event_type or "").strip().lower()
ts_ref = 0
try:
ts_ref = int(data.get("message_ts") or data.get("source_ts") or 0)
except Exception:
ts_ref = 0
try:
duration_ms = int(data.get("duration_ms") or 0)
except Exception:
duration_ms = 0
try:
abandoned = 1 if bool(data.get("abandoned")) else 0
except Exception:
abandoned = 0
try:
revision = int(data.get("revision") or 0)
except Exception:
revision = 0
logical_key = "|".join(
[
str(user_id),
str(session_id),
str(event_type or "").strip().lower(),
str(direction or "").strip().lower(),
str(origin_transport or "").strip().lower(),
str(origin_message_id or "").strip(),
str(origin_chat_id or "").strip(),
str(actor_identifier or "").strip(),
str(int(ts or 0)),
str(trace_id or "").strip(),
]
)
doc_id = self._event_id(logical_key=logical_key)
payload_json = json.dumps(data, separators=(",", ":"), sort_keys=True)
return (
f"({doc_id},{int(user_id)},'{self._escape(person_id)}',"
f"'{self._escape(session_id)}','{self._escape(origin_transport)}',"
f"'{self._escape(self._event_kind(event_type))}','{self._escape(direction)}',"
f"{int(ts)},{ts_ref},'{self._escape(actor_identifier)}',{duration_ms},"
f"{abandoned},{revision},'{self._escape(payload_json)}')"
)
def upsert_event(
self,
*,
user_id: int,
person_id: str,
session_id: str,
event_type: str,
direction: str,
ts: int,
actor_identifier: str = "",
origin_transport: str = "",
origin_message_id: str = "",
origin_chat_id: str = "",
payload: dict | None = None,
raw_payload: dict | None = None,
trace_id: str = "",
) -> None:
self.ensure_table()
values = self._build_values(
user_id=user_id,
person_id=person_id,
session_id=session_id,
event_type=event_type,
direction=direction,
ts=ts,
actor_identifier=actor_identifier,
origin_transport=origin_transport,
origin_message_id=origin_message_id,
origin_chat_id=origin_chat_id,
payload=payload,
raw_payload=raw_payload,
trace_id=trace_id,
)
self._sql(
f"REPLACE INTO {self.table} "
"(id,user_id,person_id,session_id,transport,kind,direction,ts,ts_ref,actor,duration_ms,abandoned,revision,payload) "
f"VALUES {values}"
)
def query_rows(self, query: str) -> list[dict]:
return self._rows_from_sql_payload(self._sql(query))
def list_event_targets(self, *, user_id: int | None = None) -> list[dict]:
filters = []
if user_id is not None:
filters.append(f"user_id={int(user_id)}")
where_clause = f" WHERE {' AND '.join(filters)}" if filters else ""
return self.query_rows(
f"SELECT user_id, person_id FROM {self.table}{where_clause} "
"GROUP BY user_id, person_id"
)
def fetch_events(
self,
*,
user_id: int,
person_id: str,
since_ts: int,
) -> list[dict]:
return self.query_rows(
f"SELECT user_id, person_id, session_id, transport, kind, direction, ts, ts_ref, actor, duration_ms, abandoned, revision, payload "
f"FROM {self.table} "
f"WHERE user_id={int(user_id)} "
f"AND person_id='{self._escape(person_id)}' "
f"AND ts>={int(since_ts)} "
"ORDER BY ts ASC"
)
def _metric_doc_id(
self,
*,
user_id: int,
person_id: str,
window_days: int,
metric: str,
) -> int:
digest = hashlib.blake2b(
f"{int(user_id)}|{person_id}|{int(window_days)}|{metric}".encode("utf-8"),
digest_size=8,
).digest()
return max(1, int.from_bytes(digest, byteorder="big", signed=False))
def upsert_metric(
self,
*,
user_id: int,
person_id: str,
window_days: int,
metric: str,
value_ms: int,
baseline_ms: int,
z_score: float,
sample_n: int,
computed_at: int,
) -> None:
self.ensure_metrics_table()
doc_id = self._metric_doc_id(
user_id=user_id,
person_id=person_id,
window_days=window_days,
metric=metric,
)
self._sql(
f"REPLACE INTO {self.metrics_table} "
"(id,user_id,person_id,window_days,metric,value_ms,baseline_ms,z_score,sample_n,computed_at) "
f"VALUES ({doc_id},{int(user_id)},'{self._escape(person_id)}',{int(window_days)},"
f"'{self._escape(metric)}',{int(value_ms)},{int(baseline_ms)},"
f"{float(z_score)},{int(sample_n)},{int(computed_at)})"
)
def get_event_ledger_backend() -> ManticoreEventLedgerBackend:
return ManticoreEventLedgerBackend()
def upsert_conversation_event(event: ConversationEvent) -> None:
session = event.session
identifier = session.identifier
get_event_ledger_backend().upsert_event(
user_id=int(event.user_id),
person_id=str(identifier.person_id),
session_id=str(session.id),
event_type=str(event.event_type or ""),
direction=str(event.direction or "system"),
ts=int(event.ts or 0),
actor_identifier=str(event.actor_identifier or ""),
origin_transport=str(event.origin_transport or ""),
origin_message_id=str(event.origin_message_id or ""),
origin_chat_id=str(event.origin_chat_id or ""),
payload=dict(event.payload or {}),
raw_payload=dict(event.raw_payload or {}),
trace_id=str(event.trace_id or ""),
)
def get_behavioral_availability_stats(*, user_id: int) -> list[dict]:
backend = get_event_ledger_backend()
return backend.query_rows(
f"SELECT person_id, transport, "
"COUNT(*) AS total_events, "
"SUM(IF(kind IN ('presence_available','presence_unavailable'),1,0)) AS presence_events, "
"SUM(IF(kind='message_read',1,0)) AS read_events, "
"SUM(IF(kind IN ('composing_started','composing_stopped'),1,0)) AS typing_events, "
"SUM(IF(kind='message_sent',1,0)) AS message_events, "
"SUM(IF(kind='composing_abandoned',1,0)) AS abandoned_events, "
"MAX(ts) AS last_event_ts "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
"GROUP BY person_id, transport "
"ORDER BY total_events DESC, person_id ASC, transport ASC"
)
def get_behavioral_latest_states(
*,
user_id: int,
person_ids: list[str],
transport: str = "",
) -> list[dict]:
backend = get_event_ledger_backend()
cleaned_ids = [
str(value or "").strip()
for value in list(person_ids or [])
if str(value or "").strip()
]
if not cleaned_ids:
return []
id_clause = ",".join(f"'{backend._escape(value)}'" for value in cleaned_ids)
transport_clause = ""
if str(transport or "").strip():
transport_clause = (
f" AND transport='{backend._escape(str(transport or '').strip().lower())}'"
)
return backend.query_rows(
f"SELECT person_id, transport, kind, ts "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
f"AND person_id IN ({id_clause})"
f"{transport_clause} "
"ORDER BY person_id ASC, ts DESC"
)
def get_behavioral_events_for_range(
*,
user_id: int,
person_id: str,
start_ts: int,
end_ts: int,
transport: str = "",
) -> list[dict]:
backend = get_event_ledger_backend()
transport_clause = ""
if str(transport or "").strip():
transport_clause = (
f" AND transport='{backend._escape(str(transport or '').strip().lower())}'"
)
return backend.query_rows(
f"SELECT person_id, session_id, transport, kind, direction, ts, payload "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
f"AND person_id='{backend._escape(str(person_id or '').strip())}' "
f"AND ts>={int(start_ts)} AND ts<={int(end_ts)}"
f"{transport_clause} "
"ORDER BY ts ASC"
)
def get_recent_event_rows(
*,
minutes: int = 120,
service: str = "",
user_id: str = "",
limit: int = 200,
) -> list[dict]:
backend = get_event_ledger_backend()
cutoff_ts = int(time.time() * 1000) - (max(1, int(minutes)) * 60 * 1000)
where = [f"ts>={cutoff_ts}"]
if service:
where.append(f"transport='{backend._escape(str(service).strip().lower())}'")
if user_id:
where.append(f"user_id={int(user_id)}")
rows = backend.query_rows(
f"SELECT user_id, session_id, ts, kind, direction, transport, payload "
f"FROM {backend.table} "
f"WHERE {' AND '.join(where)} "
f"ORDER BY ts DESC "
f"LIMIT {max(1, min(int(limit), 500))}"
)
output = []
for row in list(rows or []):
payload = parse_payload(row.get("payload"))
legacy_event_type = str(payload.get("legacy_event_type") or "").strip().lower()
output.append(
{
"id": "",
"user_id": int(row.get("user_id") or 0),
"session_id": str(row.get("session_id") or ""),
"ts": int(row.get("ts") or 0),
"event_type": legacy_event_type or str(row.get("kind") or ""),
"kind": str(row.get("kind") or ""),
"direction": str(row.get("direction") or ""),
"origin_transport": str(row.get("transport") or ""),
"trace_id": str(payload.get("trace_id") or ""),
}
)
return output
def count_behavioral_events(*, user_id: int) -> int:
backend = get_event_ledger_backend()
rows = backend.query_rows(
f"SELECT COUNT(*) AS total_events "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)}"
)
if not rows:
return 0
try:
return int((rows[0] or {}).get("total_events") or 0)
except Exception:
return 0
def get_trace_ids(*, user_id: int, limit: int = 120) -> list[str]:
backend = get_event_ledger_backend()
rows = backend.query_rows(
f"SELECT payload "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
"ORDER BY ts DESC "
f"LIMIT {max(1, min(int(limit) * 6, 1000))}"
)
seen = set()
output = []
for row in list(rows or []):
payload = parse_payload(row.get("payload"))
trace_id = str(payload.get("trace_id") or "").strip()
if not trace_id or trace_id in seen:
continue
seen.add(trace_id)
output.append(trace_id)
if len(output) >= max(1, min(int(limit), 500)):
break
return output
def get_trace_event_rows(*, user_id: int, trace_id: str, limit: int = 500) -> list[dict]:
backend = get_event_ledger_backend()
rows = backend.query_rows(
f"SELECT user_id, session_id, ts, kind, direction, transport, payload "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
"ORDER BY ts ASC "
f"LIMIT {max(1, min(int(limit) * 8, 5000))}"
)
output = []
target = str(trace_id or "").strip()
for row in list(rows or []):
payload = parse_payload(row.get("payload"))
if str(payload.get("trace_id") or "").strip() != target:
continue
output.append(
{
"id": "",
"ts": int(row.get("ts") or 0),
"event_type": str(
payload.get("legacy_event_type") or row.get("kind") or ""
).strip(),
"kind": str(row.get("kind") or "").strip(),
"direction": str(row.get("direction") or "").strip(),
"session_id": str(row.get("session_id") or "").strip(),
"origin_transport": str(row.get("transport") or "").strip(),
"origin_message_id": str(payload.get("origin_message_id") or "").strip(),
"payload": payload,
"trace_id": target,
}
)
if len(output) >= max(1, min(int(limit), 500)):
break
return output
def get_session_event_rows(*, user_id: int, session_id: str, limit: int = 2000) -> list[dict]:
backend = get_event_ledger_backend()
rows = backend.query_rows(
f"SELECT user_id, session_id, ts, kind, direction, transport, actor, payload "
f"FROM {backend.table} "
f"WHERE user_id={int(user_id)} "
f"AND session_id='{backend._escape(str(session_id or '').strip())}' "
"ORDER BY ts ASC "
f"LIMIT {max(1, min(int(limit), 5000))}"
)
output = []
for row in list(rows or []):
payload = parse_payload(row.get("payload"))
output.append(
{
"ts": int(row.get("ts") or 0),
"event_type": str(
payload.get("legacy_event_type") or row.get("kind") or ""
).strip(),
"kind": str(row.get("kind") or "").strip(),
"direction": str(row.get("direction") or "").strip(),
"session_id": str(row.get("session_id") or "").strip(),
"origin_transport": str(row.get("transport") or "").strip(),
"actor_identifier": str(row.get("actor") or "").strip(),
"origin_message_id": str(payload.get("origin_message_id") or "").strip(),
"payload": payload,
}
)
return output

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass
from core.events.manticore import get_session_event_rows
from core.models import ChatSession, ConversationEvent, Message
@@ -59,27 +60,56 @@ def _normalize_reactions(rows: list[dict] | None) -> list[dict]:
)
def project_session_from_events(session: ChatSession) -> list[dict]:
rows = list(
ConversationEvent.objects.filter(
user=session.user,
session=session,
).order_by("ts", "created_at")
def _event_rows_for_session(session: ChatSession):
try:
rows = get_session_event_rows(
user_id=int(session.user_id),
session_id=str(session.id),
limit=2000,
)
except Exception:
rows = []
if rows:
return rows, "manticore"
return (
list(
ConversationEvent.objects.filter(
user=session.user,
session=session,
).order_by("ts", "created_at")
),
"django",
)
def project_session_from_events(session: ChatSession) -> list[dict]:
rows, _source = _event_rows_for_session(session)
projected: dict[str, _ProjectedMessage] = {}
order: list[str] = []
for event in rows:
payload = dict(event.payload or {})
event_type = str(event.event_type or "").strip().lower()
is_dict = isinstance(event, dict)
payload = dict(
(event.get("payload") if is_dict else getattr(event, "payload", {})) or {}
)
event_type = str(
(event.get("event_type") if is_dict else getattr(event, "event_type", ""))
or ""
).strip().lower()
message_id = str(
payload.get("message_id") or payload.get("target_message_id") or ""
).strip()
if event_type == "message_created":
message_id = str(
payload.get("message_id") or event.origin_message_id or ""
payload.get("message_id")
or (
event.get("origin_message_id")
if is_dict
else getattr(event, "origin_message_id", "")
)
or ""
).strip()
if not message_id:
continue
@@ -88,10 +118,14 @@ def project_session_from_events(session: ChatSession) -> list[dict]:
state = _ProjectedMessage(message_id=message_id)
projected[message_id] = state
order.append(message_id)
state.ts = _safe_int(payload.get("message_ts"), _safe_int(event.ts))
state.ts = _safe_int(
payload.get("message_ts"),
_safe_int(event.get("ts") if is_dict else getattr(event, "ts", 0)),
)
state.text = str(payload.get("text") or state.text or "")
delivered_default = _safe_int(
payload.get("delivered_ts"), _safe_int(event.ts)
payload.get("delivered_ts"),
_safe_int(event.get("ts") if is_dict else getattr(event, "ts", 0)),
)
if state.delivered_ts is None:
state.delivered_ts = delivered_default or None
@@ -102,7 +136,10 @@ def project_session_from_events(session: ChatSession) -> list[dict]:
state = projected[message_id]
if event_type == "read_receipt":
read_ts = _safe_int(payload.get("read_ts"), _safe_int(event.ts))
read_ts = _safe_int(
payload.get("read_ts"),
_safe_int(event.get("ts") if is_dict else getattr(event, "ts", 0)),
)
if read_ts > 0:
if state.read_ts is None:
state.read_ts = read_ts
@@ -114,11 +151,27 @@ def project_session_from_events(session: ChatSession) -> list[dict]:
if event_type in {"reaction_added", "reaction_removed"}:
source_service = (
str(payload.get("source_service") or event.origin_transport or "")
str(
payload.get("source_service")
or (
event.get("origin_transport")
if is_dict
else getattr(event, "origin_transport", "")
)
or ""
)
.strip()
.lower()
)
actor = str(payload.get("actor") or event.actor_identifier or "").strip()
actor = str(
payload.get("actor")
or (
event.get("actor_identifier")
if is_dict
else getattr(event, "actor_identifier", "")
)
or ""
).strip()
emoji = str(payload.get("emoji") or "").strip()
if not source_service and not actor and not emoji:
continue

148
core/events/shadow.py Normal file
View File

@@ -0,0 +1,148 @@
from __future__ import annotations
from django.db.models import Count, Max, Q
from core.models import ConversationEvent, Person, User
def _kind_from_event_type(event_type: str) -> str:
normalized = str(event_type or "").strip().lower()
return {
"message_created": "message_sent",
"delivery_receipt": "message_delivered",
"read_receipt": "message_read",
"typing_started": "composing_started",
"typing_stopped": "composing_stopped",
"composing_abandoned": "composing_abandoned",
"presence_available": "presence_available",
"presence_unavailable": "presence_unavailable",
}.get(normalized, normalized)
def get_shadow_behavioral_availability_stats(*, user: User) -> list[dict]:
person_map = {
str(row["id"]): str(row["name"] or "")
for row in Person.objects.filter(user=user).values("id", "name")
}
rows = (
ConversationEvent.objects.filter(
user=user,
session__identifier__person__isnull=False,
)
.values("session__identifier__person_id", "origin_transport")
.annotate(
total_events=Count("id"),
presence_events=Count(
"id",
filter=Q(event_type__in=["presence_available", "presence_unavailable"]),
),
read_events=Count("id", filter=Q(event_type="read_receipt")),
typing_events=Count(
"id",
filter=Q(
event_type__in=["typing_started", "typing_stopped"]
),
),
message_events=Count("id", filter=Q(event_type="message_created")),
abandoned_events=Count("id", filter=Q(event_type="composing_abandoned")),
last_event_ts=Max("ts"),
)
.order_by("-total_events", "session__identifier__person_id", "origin_transport")
)
output = []
for row in rows:
person_id = str(row.get("session__identifier__person_id") or "").strip()
output.append(
{
"person_id": person_id,
"person_name": person_map.get(person_id, person_id or "-"),
"service": str(row.get("origin_transport") or "").strip().lower(),
"total_events": int(row.get("total_events") or 0),
"presence_events": int(row.get("presence_events") or 0),
"read_events": int(row.get("read_events") or 0),
"typing_events": int(row.get("typing_events") or 0),
"message_events": int(row.get("message_events") or 0),
"abandoned_events": int(row.get("abandoned_events") or 0),
"last_event_ts": int(row.get("last_event_ts") or 0),
}
)
return output
def get_shadow_behavioral_latest_states(
*, user: User, person_ids: list[str], transport: str = ""
) -> list[dict]:
queryset = ConversationEvent.objects.filter(
user=user,
session__identifier__person_id__in=[str(value) for value in person_ids],
event_type__in=[
"message_created",
"delivery_receipt",
"read_receipt",
"typing_started",
"typing_stopped",
"composing_abandoned",
"presence_available",
"presence_unavailable",
],
).select_related("session__identifier")
if transport:
queryset = queryset.filter(origin_transport=str(transport).strip().lower())
rows = []
seen = set()
for row in queryset.order_by(
"session__identifier__person_id", "-ts", "-created_at"
)[:500]:
person_id = str(getattr(row.session.identifier, "person_id", "") or "").strip()
if not person_id or person_id in seen:
continue
seen.add(person_id)
rows.append(
{
"person_id": person_id,
"transport": str(row.origin_transport or "").strip().lower(),
"kind": _kind_from_event_type(row.event_type),
"ts": int(row.ts or 0),
}
)
return rows
def get_shadow_behavioral_events_for_range(
*,
user: User,
person_id: str,
start_ts: int,
end_ts: int,
transport: str = "",
) -> list[dict]:
queryset = ConversationEvent.objects.filter(
user=user,
session__identifier__person_id=str(person_id or "").strip(),
ts__gte=int(start_ts),
ts__lte=int(end_ts),
event_type__in=[
"message_created",
"delivery_receipt",
"read_receipt",
"typing_started",
"typing_stopped",
"composing_abandoned",
"presence_available",
"presence_unavailable",
],
).order_by("ts", "created_at")
if transport:
queryset = queryset.filter(origin_transport=str(transport).strip().lower())
return [
{
"person_id": str(person_id or "").strip(),
"session_id": str(row.session_id or ""),
"transport": str(row.origin_transport or "").strip().lower(),
"kind": _kind_from_event_type(row.event_type),
"direction": str(row.direction or "").strip().lower(),
"ts": int(row.ts or 0),
"payload": dict(row.payload or {}),
}
for row in queryset[:1000]
]