from __future__ import annotations import re from datetime import timezone as dt_timezone from typing import Any from django.db import transaction from django.utils import timezone from django.utils.dateparse import parse_datetime from core.models import ( MemoryChangeRequest, MemoryItem, MemorySourceReference, MessageEvent, WorkspaceConversation, ) from core.util import logs log = logs.get_logger("memory-pipeline") _LIKE_RE = re.compile( r"\b(?:i (?:like|love|prefer)|my favorite)\s+(?P[^.!?]{2,120})", re.IGNORECASE, ) _DISLIKE_RE = re.compile( r"\b(?:i (?:dislike|hate|avoid)|i don't like)\s+(?P[^.!?]{2,120})", re.IGNORECASE, ) _STYLE_RE = re.compile( r"\b(?:please|pls)\s+(?P[^.!?]{3,120})", re.IGNORECASE, ) def _clean_value(value: str) -> str: return " ".join(str(value or "").strip().split()) def extract_memory_candidates(text: str) -> list[dict[str, Any]]: source = str(text or "").strip() if not source: return [] candidates: list[dict[str, Any]] = [] for regex, field, kind, confidence in ( (_LIKE_RE, "likes", "fact", 0.68), (_DISLIKE_RE, "dislikes", "fact", 0.68), (_STYLE_RE, "communication_style", "state", 0.52), ): for match in regex.finditer(source): value = _clean_value(match.group("value")) if len(value) < 3: continue candidates.append( { "memory_kind": kind, "field": field, "text": value, "confidence_score": confidence, } ) return candidates def _existing_fingerprints(user_id: int) -> set[tuple[str, str, str, str]]: items = MemoryItem.objects.filter(user_id=int(user_id)).only( "memory_kind", "conversation_id", "person_id", "content", ) fingerprints = set() for item in items: content = item.content or {} field = str(content.get("field") or "").strip().lower() text = _clean_value(str(content.get("text") or "")).lower() fingerprints.add( ( str(item.memory_kind or "").strip().lower(), str(item.conversation_id or "").strip(), str(item.person_id or "").strip(), f"{field}:{text}", ) ) return fingerprints def _infer_single_person_id(conversation: WorkspaceConversation) -> str: participant_ids = list(conversation.participants.values_list("id", flat=True)[:2]) if len(participant_ids) != 1: return "" return str(participant_ids[0] or "") @transaction.atomic def suggest_memories_from_recent_messages( *, user_id: int, limit_messages: int = 300, max_items: int = 30, ) -> dict[str, int]: safe_limit_messages = max(1, min(2000, int(limit_messages or 300))) safe_max_items = max(1, min(500, int(max_items or 30))) existing = _existing_fingerprints(int(user_id)) scanned = 0 queued = 0 rows = ( MessageEvent.objects.filter(user_id=int(user_id), direction="in") .select_related("conversation") .order_by("-ts")[:safe_limit_messages] ) for event in rows: scanned += 1 person_id = _infer_single_person_id(event.conversation) for candidate in extract_memory_candidates(event.text or ""): field = str(candidate.get("field") or "").strip().lower() text = _clean_value(str(candidate.get("text") or "")) if not text: continue fingerprint = ( str(candidate.get("memory_kind") or "fact").strip().lower(), str(event.conversation_id or "").strip(), person_id, f"{field}:{text.lower()}", ) if fingerprint in existing: continue item = MemoryItem.objects.create( user_id=int(user_id), conversation=event.conversation, person_id=person_id or None, memory_kind=str(candidate.get("memory_kind") or "fact"), status="proposed", content={"field": field, "text": text}, provenance={ "pipeline": "message_regex", "message_event_id": str(event.id), }, confidence_score=float(candidate.get("confidence_score") or 0.5), ) MemorySourceReference.objects.create( memory=item, message_event=event, source_label="message_event", ) MemoryChangeRequest.objects.create( user_id=int(user_id), memory=item, conversation=event.conversation, person_id=person_id or None, action="create", status="pending", proposed_memory_kind=item.memory_kind, proposed_content=item.content, proposed_confidence_score=item.confidence_score, reason="Auto-suggested from recent inbound messages.", requested_by_identifier="memory-pipeline", ) existing.add(fingerprint) queued += 1 if queued >= safe_max_items: return {"scanned": scanned, "queued": queued} return {"scanned": scanned, "queued": queued} def _coerce_expires_at(value: Any): raw = str(value or "").strip() if not raw: return None parsed = parse_datetime(raw) if parsed is None: raise ValueError("expires_at must be an ISO datetime") if parsed.tzinfo is None: return timezone.make_aware(parsed, dt_timezone.utc) return parsed @transaction.atomic def create_memory_change_request( *, user_id: int, action: str, conversation_id: str = "", person_id: str = "", memory_id: str = "", memory_kind: str = "", content: dict[str, Any] | None = None, confidence_score: float | None = None, expires_at: str = "", reason: str = "", requested_by_identifier: str = "", ) -> MemoryChangeRequest: normalized_action = str(action or "").strip().lower() if normalized_action not in {"create", "update", "delete"}: raise ValueError("action must be create/update/delete") memory = None if memory_id: memory = MemoryItem.objects.filter(user_id=int(user_id), id=memory_id).first() if memory is None: raise ValueError("memory_id not found") conversation = None if conversation_id: conversation = WorkspaceConversation.objects.filter( user_id=int(user_id), id=conversation_id, ).first() if conversation is None: raise ValueError("conversation_id not found") if normalized_action == "create" and conversation is None: raise ValueError("conversation_id is required for create") if normalized_action in {"update", "delete"} and memory is None: raise ValueError("memory_id is required for update/delete") return MemoryChangeRequest.objects.create( user_id=int(user_id), memory=memory, conversation=conversation or (memory.conversation if memory else None), person_id=person_id or (str(memory.person_id or "") if memory else "") or None, action=normalized_action, status="pending", proposed_memory_kind=str(memory_kind or (memory.memory_kind if memory else "")).strip(), proposed_content=dict(content or {}), proposed_confidence_score=( float(confidence_score) if confidence_score is not None else (float(memory.confidence_score) if memory else None) ), proposed_expires_at=_coerce_expires_at(expires_at), reason=str(reason or "").strip(), requested_by_identifier=str(requested_by_identifier or "").strip(), ) @transaction.atomic def review_memory_change_request( *, user_id: int, request_id: str, decision: str, reviewer_identifier: str = "", note: str = "", ) -> MemoryChangeRequest: req = MemoryChangeRequest.objects.select_related("memory", "conversation").get( id=request_id, user_id=int(user_id), ) if req.status != "pending": raise ValueError("request is not pending") now = timezone.now() normalized_decision = str(decision or "").strip().lower() if normalized_decision not in {"approve", "reject"}: raise ValueError("decision must be approve/reject") req.reviewed_by_identifier = str(reviewer_identifier or "").strip() req.reviewed_at = now if note: req.reason = f"{req.reason}\n\nReview note: {str(note).strip()}".strip() if normalized_decision == "reject": req.status = "rejected" req.save( update_fields=[ "status", "reviewed_by_identifier", "reviewed_at", "reason", "updated_at", ] ) return req req.status = "approved" req.save( update_fields=[ "status", "reviewed_by_identifier", "reviewed_at", "reason", "updated_at", ] ) memory = req.memory if req.action == "create": if memory is None: if req.conversation is None: raise ValueError("approved create request missing conversation") memory = MemoryItem.objects.create( user_id=int(user_id), conversation=req.conversation, person_id=req.person_id, memory_kind=req.proposed_memory_kind or "fact", status="active", content=req.proposed_content or {}, confidence_score=float(req.proposed_confidence_score or 0.5), expires_at=req.proposed_expires_at, last_verified_at=now, provenance={"approved_request_id": str(req.id)}, ) req.memory = memory else: memory.status = "active" memory.last_verified_at = now memory.save(update_fields=["status", "last_verified_at", "updated_at"]) elif req.action == "update": if memory is None: raise ValueError("approved update request missing memory") if req.proposed_memory_kind: memory.memory_kind = req.proposed_memory_kind if req.proposed_content: memory.content = req.proposed_content if req.proposed_confidence_score is not None: memory.confidence_score = float(req.proposed_confidence_score) memory.expires_at = req.proposed_expires_at memory.last_verified_at = now memory.status = "active" memory.save() else: if memory is None: raise ValueError("approved delete request missing memory") memory.status = "deprecated" memory.last_verified_at = now memory.save(update_fields=["status", "last_verified_at", "updated_at"]) req.status = "applied" req.save(update_fields=["status", "memory", "updated_at"]) return req @transaction.atomic def run_memory_hygiene(*, user_id: int | None = None, dry_run: bool = False) -> dict[str, int]: now = timezone.now() queryset = MemoryItem.objects.filter(status="active") if user_id is not None: queryset = queryset.filter(user_id=int(user_id)) expired_ids = list( queryset.filter(expires_at__isnull=False, expires_at__lte=now).values_list( "id", flat=True, ) ) expired = len(expired_ids) if expired and not dry_run: MemoryItem.objects.filter(id__in=expired_ids).update(status="deprecated") contradictions = 0 queued = 0 grouped: dict[tuple[int, str, str, str, str], dict[str, list[MemoryItem]]] = {} for item in queryset.select_related("conversation", "person"): content = item.content or {} field = str(content.get("field") or content.get("key") or "").strip().lower() text = _clean_value(str(content.get("text") or content.get("value") or "")).lower() if not field or not text: continue scope = ( int(item.user_id), str(item.person_id or ""), str(item.conversation_id or ""), str(item.memory_kind or ""), field, ) grouped.setdefault(scope, {}) grouped[scope].setdefault(text, []) grouped[scope][text].append(item) for values in grouped.values(): if len(values.keys()) <= 1: continue flat = [item for subset in values.values() for item in subset] contradictions += len(flat) if dry_run: continue for item in flat: already_pending = MemoryChangeRequest.objects.filter( user_id=item.user_id, memory=item, action="update", status="pending", reason__icontains="contradiction", ).exists() if already_pending: continue MemoryChangeRequest.objects.create( user_id=item.user_id, memory=item, conversation=item.conversation, person=item.person, action="update", status="pending", proposed_memory_kind=item.memory_kind, proposed_content=item.content, proposed_confidence_score=item.confidence_score, proposed_expires_at=item.expires_at, reason="Contradiction detected by hygiene job.", requested_by_identifier="memory-hygiene", ) queued += 1 log.info( "memory hygiene user=%s dry_run=%s expired=%s contradictions=%s queued=%s", user_id if user_id is not None else "-", dry_run, expired, contradictions, queued, ) return { "expired": expired, "contradictions": contradictions, "queued_requests": queued, }