Increase platform abstraction cohesion
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from .search_backend import get_memory_search_backend
|
||||
from .retrieval import retrieve_memories_for_prompt
|
||||
|
||||
__all__ = ["get_memory_search_backend"]
|
||||
__all__ = ["get_memory_search_backend", "retrieve_memories_for_prompt"]
|
||||
|
||||
419
core/memory/pipeline.py
Normal file
419
core/memory/pipeline.py
Normal file
@@ -0,0 +1,419 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import timezone as dt_timezone
|
||||
from typing import Any
|
||||
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
from django.utils.dateparse import parse_datetime
|
||||
|
||||
from core.models import (
|
||||
MemoryChangeRequest,
|
||||
MemoryItem,
|
||||
MemorySourceReference,
|
||||
MessageEvent,
|
||||
WorkspaceConversation,
|
||||
)
|
||||
from core.util import logs
|
||||
|
||||
log = logs.get_logger("memory-pipeline")
|
||||
|
||||
_LIKE_RE = re.compile(
|
||||
r"\b(?:i (?:like|love|prefer)|my favorite)\s+(?P<value>[^.!?]{2,120})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_DISLIKE_RE = re.compile(
|
||||
r"\b(?:i (?:dislike|hate|avoid)|i don't like)\s+(?P<value>[^.!?]{2,120})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
_STYLE_RE = re.compile(
|
||||
r"\b(?:please|pls)\s+(?P<value>[^.!?]{3,120})",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def _clean_value(value: str) -> str:
|
||||
return " ".join(str(value or "").strip().split())
|
||||
|
||||
|
||||
def extract_memory_candidates(text: str) -> list[dict[str, Any]]:
|
||||
source = str(text or "").strip()
|
||||
if not source:
|
||||
return []
|
||||
|
||||
candidates: list[dict[str, Any]] = []
|
||||
for regex, field, kind, confidence in (
|
||||
(_LIKE_RE, "likes", "fact", 0.68),
|
||||
(_DISLIKE_RE, "dislikes", "fact", 0.68),
|
||||
(_STYLE_RE, "communication_style", "state", 0.52),
|
||||
):
|
||||
for match in regex.finditer(source):
|
||||
value = _clean_value(match.group("value"))
|
||||
if len(value) < 3:
|
||||
continue
|
||||
candidates.append(
|
||||
{
|
||||
"memory_kind": kind,
|
||||
"field": field,
|
||||
"text": value,
|
||||
"confidence_score": confidence,
|
||||
}
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
def _existing_fingerprints(user_id: int) -> set[tuple[str, str, str, str]]:
|
||||
items = MemoryItem.objects.filter(user_id=int(user_id)).only(
|
||||
"memory_kind",
|
||||
"conversation_id",
|
||||
"person_id",
|
||||
"content",
|
||||
)
|
||||
fingerprints = set()
|
||||
for item in items:
|
||||
content = item.content or {}
|
||||
field = str(content.get("field") or "").strip().lower()
|
||||
text = _clean_value(str(content.get("text") or "")).lower()
|
||||
fingerprints.add(
|
||||
(
|
||||
str(item.memory_kind or "").strip().lower(),
|
||||
str(item.conversation_id or "").strip(),
|
||||
str(item.person_id or "").strip(),
|
||||
f"{field}:{text}",
|
||||
)
|
||||
)
|
||||
return fingerprints
|
||||
|
||||
|
||||
def _infer_single_person_id(conversation: WorkspaceConversation) -> str:
|
||||
participant_ids = list(conversation.participants.values_list("id", flat=True)[:2])
|
||||
if len(participant_ids) != 1:
|
||||
return ""
|
||||
return str(participant_ids[0] or "")
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def suggest_memories_from_recent_messages(
|
||||
*,
|
||||
user_id: int,
|
||||
limit_messages: int = 300,
|
||||
max_items: int = 30,
|
||||
) -> dict[str, int]:
|
||||
safe_limit_messages = max(1, min(2000, int(limit_messages or 300)))
|
||||
safe_max_items = max(1, min(500, int(max_items or 30)))
|
||||
existing = _existing_fingerprints(int(user_id))
|
||||
|
||||
scanned = 0
|
||||
queued = 0
|
||||
rows = (
|
||||
MessageEvent.objects.filter(user_id=int(user_id), direction="in")
|
||||
.select_related("conversation")
|
||||
.order_by("-ts")[:safe_limit_messages]
|
||||
)
|
||||
for event in rows:
|
||||
scanned += 1
|
||||
person_id = _infer_single_person_id(event.conversation)
|
||||
for candidate in extract_memory_candidates(event.text or ""):
|
||||
field = str(candidate.get("field") or "").strip().lower()
|
||||
text = _clean_value(str(candidate.get("text") or ""))
|
||||
if not text:
|
||||
continue
|
||||
fingerprint = (
|
||||
str(candidate.get("memory_kind") or "fact").strip().lower(),
|
||||
str(event.conversation_id or "").strip(),
|
||||
person_id,
|
||||
f"{field}:{text.lower()}",
|
||||
)
|
||||
if fingerprint in existing:
|
||||
continue
|
||||
|
||||
item = MemoryItem.objects.create(
|
||||
user_id=int(user_id),
|
||||
conversation=event.conversation,
|
||||
person_id=person_id or None,
|
||||
memory_kind=str(candidate.get("memory_kind") or "fact"),
|
||||
status="proposed",
|
||||
content={"field": field, "text": text},
|
||||
provenance={
|
||||
"pipeline": "message_regex",
|
||||
"message_event_id": str(event.id),
|
||||
},
|
||||
confidence_score=float(candidate.get("confidence_score") or 0.5),
|
||||
)
|
||||
MemorySourceReference.objects.create(
|
||||
memory=item,
|
||||
message_event=event,
|
||||
source_label="message_event",
|
||||
)
|
||||
MemoryChangeRequest.objects.create(
|
||||
user_id=int(user_id),
|
||||
memory=item,
|
||||
conversation=event.conversation,
|
||||
person_id=person_id or None,
|
||||
action="create",
|
||||
status="pending",
|
||||
proposed_memory_kind=item.memory_kind,
|
||||
proposed_content=item.content,
|
||||
proposed_confidence_score=item.confidence_score,
|
||||
reason="Auto-suggested from recent inbound messages.",
|
||||
requested_by_identifier="memory-pipeline",
|
||||
)
|
||||
existing.add(fingerprint)
|
||||
queued += 1
|
||||
if queued >= safe_max_items:
|
||||
return {"scanned": scanned, "queued": queued}
|
||||
return {"scanned": scanned, "queued": queued}
|
||||
|
||||
|
||||
def _coerce_expires_at(value: Any):
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
parsed = parse_datetime(raw)
|
||||
if parsed is None:
|
||||
raise ValueError("expires_at must be an ISO datetime")
|
||||
if parsed.tzinfo is None:
|
||||
return timezone.make_aware(parsed, dt_timezone.utc)
|
||||
return parsed
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def create_memory_change_request(
|
||||
*,
|
||||
user_id: int,
|
||||
action: str,
|
||||
conversation_id: str = "",
|
||||
person_id: str = "",
|
||||
memory_id: str = "",
|
||||
memory_kind: str = "",
|
||||
content: dict[str, Any] | None = None,
|
||||
confidence_score: float | None = None,
|
||||
expires_at: str = "",
|
||||
reason: str = "",
|
||||
requested_by_identifier: str = "",
|
||||
) -> MemoryChangeRequest:
|
||||
normalized_action = str(action or "").strip().lower()
|
||||
if normalized_action not in {"create", "update", "delete"}:
|
||||
raise ValueError("action must be create/update/delete")
|
||||
|
||||
memory = None
|
||||
if memory_id:
|
||||
memory = MemoryItem.objects.filter(user_id=int(user_id), id=memory_id).first()
|
||||
if memory is None:
|
||||
raise ValueError("memory_id not found")
|
||||
|
||||
conversation = None
|
||||
if conversation_id:
|
||||
conversation = WorkspaceConversation.objects.filter(
|
||||
user_id=int(user_id),
|
||||
id=conversation_id,
|
||||
).first()
|
||||
if conversation is None:
|
||||
raise ValueError("conversation_id not found")
|
||||
|
||||
if normalized_action == "create" and conversation is None:
|
||||
raise ValueError("conversation_id is required for create")
|
||||
if normalized_action in {"update", "delete"} and memory is None:
|
||||
raise ValueError("memory_id is required for update/delete")
|
||||
|
||||
return MemoryChangeRequest.objects.create(
|
||||
user_id=int(user_id),
|
||||
memory=memory,
|
||||
conversation=conversation or (memory.conversation if memory else None),
|
||||
person_id=person_id or (str(memory.person_id or "") if memory else "") or None,
|
||||
action=normalized_action,
|
||||
status="pending",
|
||||
proposed_memory_kind=str(memory_kind or (memory.memory_kind if memory else "")).strip(),
|
||||
proposed_content=dict(content or {}),
|
||||
proposed_confidence_score=(
|
||||
float(confidence_score)
|
||||
if confidence_score is not None
|
||||
else (float(memory.confidence_score) if memory else None)
|
||||
),
|
||||
proposed_expires_at=_coerce_expires_at(expires_at),
|
||||
reason=str(reason or "").strip(),
|
||||
requested_by_identifier=str(requested_by_identifier or "").strip(),
|
||||
)
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def review_memory_change_request(
|
||||
*,
|
||||
user_id: int,
|
||||
request_id: str,
|
||||
decision: str,
|
||||
reviewer_identifier: str = "",
|
||||
note: str = "",
|
||||
) -> MemoryChangeRequest:
|
||||
req = MemoryChangeRequest.objects.select_related("memory", "conversation").get(
|
||||
id=request_id,
|
||||
user_id=int(user_id),
|
||||
)
|
||||
if req.status != "pending":
|
||||
raise ValueError("request is not pending")
|
||||
|
||||
now = timezone.now()
|
||||
normalized_decision = str(decision or "").strip().lower()
|
||||
if normalized_decision not in {"approve", "reject"}:
|
||||
raise ValueError("decision must be approve/reject")
|
||||
|
||||
req.reviewed_by_identifier = str(reviewer_identifier or "").strip()
|
||||
req.reviewed_at = now
|
||||
if note:
|
||||
req.reason = f"{req.reason}\n\nReview note: {str(note).strip()}".strip()
|
||||
|
||||
if normalized_decision == "reject":
|
||||
req.status = "rejected"
|
||||
req.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"reviewed_by_identifier",
|
||||
"reviewed_at",
|
||||
"reason",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
return req
|
||||
|
||||
req.status = "approved"
|
||||
req.save(
|
||||
update_fields=[
|
||||
"status",
|
||||
"reviewed_by_identifier",
|
||||
"reviewed_at",
|
||||
"reason",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
|
||||
memory = req.memory
|
||||
if req.action == "create":
|
||||
if memory is None:
|
||||
if req.conversation is None:
|
||||
raise ValueError("approved create request missing conversation")
|
||||
memory = MemoryItem.objects.create(
|
||||
user_id=int(user_id),
|
||||
conversation=req.conversation,
|
||||
person_id=req.person_id,
|
||||
memory_kind=req.proposed_memory_kind or "fact",
|
||||
status="active",
|
||||
content=req.proposed_content or {},
|
||||
confidence_score=float(req.proposed_confidence_score or 0.5),
|
||||
expires_at=req.proposed_expires_at,
|
||||
last_verified_at=now,
|
||||
provenance={"approved_request_id": str(req.id)},
|
||||
)
|
||||
req.memory = memory
|
||||
else:
|
||||
memory.status = "active"
|
||||
memory.last_verified_at = now
|
||||
memory.save(update_fields=["status", "last_verified_at", "updated_at"])
|
||||
elif req.action == "update":
|
||||
if memory is None:
|
||||
raise ValueError("approved update request missing memory")
|
||||
if req.proposed_memory_kind:
|
||||
memory.memory_kind = req.proposed_memory_kind
|
||||
if req.proposed_content:
|
||||
memory.content = req.proposed_content
|
||||
if req.proposed_confidence_score is not None:
|
||||
memory.confidence_score = float(req.proposed_confidence_score)
|
||||
memory.expires_at = req.proposed_expires_at
|
||||
memory.last_verified_at = now
|
||||
memory.status = "active"
|
||||
memory.save()
|
||||
else:
|
||||
if memory is None:
|
||||
raise ValueError("approved delete request missing memory")
|
||||
memory.status = "deprecated"
|
||||
memory.last_verified_at = now
|
||||
memory.save(update_fields=["status", "last_verified_at", "updated_at"])
|
||||
|
||||
req.status = "applied"
|
||||
req.save(update_fields=["status", "memory", "updated_at"])
|
||||
return req
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def run_memory_hygiene(*, user_id: int | None = None, dry_run: bool = False) -> dict[str, int]:
|
||||
now = timezone.now()
|
||||
queryset = MemoryItem.objects.filter(status="active")
|
||||
if user_id is not None:
|
||||
queryset = queryset.filter(user_id=int(user_id))
|
||||
|
||||
expired_ids = list(
|
||||
queryset.filter(expires_at__isnull=False, expires_at__lte=now).values_list(
|
||||
"id",
|
||||
flat=True,
|
||||
)
|
||||
)
|
||||
expired = len(expired_ids)
|
||||
if expired and not dry_run:
|
||||
MemoryItem.objects.filter(id__in=expired_ids).update(status="deprecated")
|
||||
|
||||
contradictions = 0
|
||||
queued = 0
|
||||
grouped: dict[tuple[int, str, str, str, str], dict[str, list[MemoryItem]]] = {}
|
||||
for item in queryset.select_related("conversation", "person"):
|
||||
content = item.content or {}
|
||||
field = str(content.get("field") or content.get("key") or "").strip().lower()
|
||||
text = _clean_value(str(content.get("text") or content.get("value") or "")).lower()
|
||||
if not field or not text:
|
||||
continue
|
||||
scope = (
|
||||
int(item.user_id),
|
||||
str(item.person_id or ""),
|
||||
str(item.conversation_id or ""),
|
||||
str(item.memory_kind or ""),
|
||||
field,
|
||||
)
|
||||
grouped.setdefault(scope, {})
|
||||
grouped[scope].setdefault(text, [])
|
||||
grouped[scope][text].append(item)
|
||||
|
||||
for values in grouped.values():
|
||||
if len(values.keys()) <= 1:
|
||||
continue
|
||||
flat = [item for subset in values.values() for item in subset]
|
||||
contradictions += len(flat)
|
||||
if dry_run:
|
||||
continue
|
||||
for item in flat:
|
||||
already_pending = MemoryChangeRequest.objects.filter(
|
||||
user_id=item.user_id,
|
||||
memory=item,
|
||||
action="update",
|
||||
status="pending",
|
||||
reason__icontains="contradiction",
|
||||
).exists()
|
||||
if already_pending:
|
||||
continue
|
||||
MemoryChangeRequest.objects.create(
|
||||
user_id=item.user_id,
|
||||
memory=item,
|
||||
conversation=item.conversation,
|
||||
person=item.person,
|
||||
action="update",
|
||||
status="pending",
|
||||
proposed_memory_kind=item.memory_kind,
|
||||
proposed_content=item.content,
|
||||
proposed_confidence_score=item.confidence_score,
|
||||
proposed_expires_at=item.expires_at,
|
||||
reason="Contradiction detected by hygiene job.",
|
||||
requested_by_identifier="memory-hygiene",
|
||||
)
|
||||
queued += 1
|
||||
|
||||
log.info(
|
||||
"memory hygiene user=%s dry_run=%s expired=%s contradictions=%s queued=%s",
|
||||
user_id if user_id is not None else "-",
|
||||
dry_run,
|
||||
expired,
|
||||
contradictions,
|
||||
queued,
|
||||
)
|
||||
return {
|
||||
"expired": expired,
|
||||
"contradictions": contradictions,
|
||||
"queued_requests": queued,
|
||||
}
|
||||
123
core/memory/retrieval.py
Normal file
123
core/memory/retrieval.py
Normal file
@@ -0,0 +1,123 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from django.db.models import Q
|
||||
from django.utils import timezone
|
||||
|
||||
from core.memory.search_backend import get_memory_search_backend
|
||||
from core.models import MemoryItem
|
||||
|
||||
|
||||
def _coerce_statuses(value: Any, default: tuple[str, ...]) -> tuple[str, ...]:
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
items = [str(item or "").strip().lower() for item in value]
|
||||
else:
|
||||
items = [item.strip().lower() for item in str(value or "").split(",")]
|
||||
cleaned = tuple(item for item in items if item)
|
||||
return cleaned or default
|
||||
|
||||
|
||||
def _base_queryset(
|
||||
*,
|
||||
user_id: int,
|
||||
person_id: str = "",
|
||||
conversation_id: str = "",
|
||||
statuses: tuple[str, ...] = ("active",),
|
||||
):
|
||||
now = timezone.now()
|
||||
queryset = MemoryItem.objects.filter(user_id=int(user_id))
|
||||
if statuses:
|
||||
queryset = queryset.filter(status__in=list(statuses))
|
||||
queryset = queryset.filter(Q(expires_at__isnull=True) | Q(expires_at__gt=now))
|
||||
if person_id:
|
||||
queryset = queryset.filter(person_id=person_id)
|
||||
if conversation_id:
|
||||
queryset = queryset.filter(conversation_id=conversation_id)
|
||||
return queryset
|
||||
|
||||
|
||||
def retrieve_memories_for_prompt(
|
||||
*,
|
||||
user_id: int,
|
||||
query: str = "",
|
||||
person_id: str = "",
|
||||
conversation_id: str = "",
|
||||
statuses: tuple[str, ...] = ("active",),
|
||||
limit: int = 20,
|
||||
) -> list[dict[str, Any]]:
|
||||
statuses = _coerce_statuses(statuses, ("active",))
|
||||
safe_limit = max(1, min(200, int(limit or 20)))
|
||||
search_text = str(query or "").strip()
|
||||
|
||||
if search_text:
|
||||
backend = get_memory_search_backend()
|
||||
hits = backend.search(
|
||||
user_id=int(user_id),
|
||||
query=search_text,
|
||||
conversation_id=conversation_id,
|
||||
limit=safe_limit,
|
||||
include_statuses=statuses,
|
||||
)
|
||||
ids = [str(hit.memory_id or "").strip() for hit in hits if str(hit.memory_id or "").strip()]
|
||||
scoped = _base_queryset(
|
||||
user_id=int(user_id),
|
||||
person_id=person_id,
|
||||
conversation_id=conversation_id,
|
||||
statuses=statuses,
|
||||
).filter(id__in=ids)
|
||||
by_id = {str(item.id): item for item in scoped}
|
||||
rows = []
|
||||
for hit in hits:
|
||||
item = by_id.get(str(hit.memory_id))
|
||||
if not item:
|
||||
continue
|
||||
rows.append(
|
||||
{
|
||||
"id": str(item.id),
|
||||
"memory_kind": str(item.memory_kind or ""),
|
||||
"status": str(item.status or ""),
|
||||
"person_id": str(item.person_id or ""),
|
||||
"conversation_id": str(item.conversation_id or ""),
|
||||
"content": item.content or {},
|
||||
"provenance": item.provenance or {},
|
||||
"confidence_score": float(item.confidence_score or 0.0),
|
||||
"expires_at": item.expires_at.isoformat() if item.expires_at else "",
|
||||
"last_verified_at": (
|
||||
item.last_verified_at.isoformat() if item.last_verified_at else ""
|
||||
),
|
||||
"updated_at": item.updated_at.isoformat() if item.updated_at else "",
|
||||
"search_score": float(hit.score or 0.0),
|
||||
"search_summary": str(hit.summary or ""),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
queryset = _base_queryset(
|
||||
user_id=int(user_id),
|
||||
person_id=person_id,
|
||||
conversation_id=conversation_id,
|
||||
statuses=statuses,
|
||||
).order_by("-last_verified_at", "-updated_at")
|
||||
rows = []
|
||||
for item in queryset[:safe_limit]:
|
||||
rows.append(
|
||||
{
|
||||
"id": str(item.id),
|
||||
"memory_kind": str(item.memory_kind or ""),
|
||||
"status": str(item.status or ""),
|
||||
"person_id": str(item.person_id or ""),
|
||||
"conversation_id": str(item.conversation_id or ""),
|
||||
"content": item.content or {},
|
||||
"provenance": item.provenance or {},
|
||||
"confidence_score": float(item.confidence_score or 0.0),
|
||||
"expires_at": item.expires_at.isoformat() if item.expires_at else "",
|
||||
"last_verified_at": (
|
||||
item.last_verified_at.isoformat() if item.last_verified_at else ""
|
||||
),
|
||||
"updated_at": item.updated_at.isoformat() if item.updated_at else "",
|
||||
"search_score": 0.0,
|
||||
"search_summary": "",
|
||||
}
|
||||
)
|
||||
return rows
|
||||
@@ -137,6 +137,8 @@ class DjangoMemorySearchBackend(BaseMemorySearchBackend):
|
||||
|
||||
class ManticoreMemorySearchBackend(BaseMemorySearchBackend):
|
||||
name = "manticore"
|
||||
_table_ready_cache: dict[str, float] = {}
|
||||
_table_ready_ttl_seconds = 30.0
|
||||
|
||||
def __init__(self):
|
||||
self.base_url = str(
|
||||
@@ -146,6 +148,7 @@ class ManticoreMemorySearchBackend(BaseMemorySearchBackend):
|
||||
getattr(settings, "MANTICORE_MEMORY_TABLE", "gia_memory_items")
|
||||
).strip() or "gia_memory_items"
|
||||
self.timeout_seconds = int(getattr(settings, "MANTICORE_HTTP_TIMEOUT", 5) or 5)
|
||||
self._table_cache_key = f"{self.base_url}|{self.table}"
|
||||
|
||||
def _sql(self, query: str) -> dict[str, Any]:
|
||||
response = requests.post(
|
||||
@@ -160,6 +163,9 @@ class ManticoreMemorySearchBackend(BaseMemorySearchBackend):
|
||||
return dict(payload or {})
|
||||
|
||||
def ensure_table(self) -> None:
|
||||
last_ready = float(self._table_ready_cache.get(self._table_cache_key, 0.0) or 0.0)
|
||||
if (time.time() - last_ready) <= float(self._table_ready_ttl_seconds):
|
||||
return
|
||||
self._sql(
|
||||
(
|
||||
f"CREATE TABLE IF NOT EXISTS {self.table} ("
|
||||
@@ -175,6 +181,7 @@ class ManticoreMemorySearchBackend(BaseMemorySearchBackend):
|
||||
")"
|
||||
)
|
||||
)
|
||||
self._table_ready_cache[self._table_cache_key] = time.time()
|
||||
|
||||
def _doc_id(self, memory_id: str) -> int:
|
||||
digest = hashlib.blake2b(
|
||||
@@ -206,11 +213,66 @@ class ManticoreMemorySearchBackend(BaseMemorySearchBackend):
|
||||
)
|
||||
self._sql(query)
|
||||
|
||||
def _build_upsert_values_clause(self, item: MemoryItem) -> str:
|
||||
memory_id = str(item.id)
|
||||
doc_id = self._doc_id(memory_id)
|
||||
summary = _flatten_to_text(item.content)[:280]
|
||||
body = _flatten_to_text(item.content)
|
||||
updated_ts = int(item.updated_at.timestamp() * 1000)
|
||||
return (
|
||||
f"({doc_id},'{self._escape(memory_id)}',{int(item.user_id)},"
|
||||
f"'{self._escape(item.conversation_id)}','{self._escape(item.memory_kind)}',"
|
||||
f"'{self._escape(item.status)}',{updated_ts},"
|
||||
f"'{self._escape(summary)}','{self._escape(body)}')"
|
||||
)
|
||||
|
||||
def delete(self, memory_id: str) -> None:
|
||||
self.ensure_table()
|
||||
doc_id = self._doc_id(memory_id)
|
||||
self._sql(f"DELETE FROM {self.table} WHERE id={doc_id}")
|
||||
|
||||
def reindex(
|
||||
self,
|
||||
*,
|
||||
user_id: int | None = None,
|
||||
include_statuses: tuple[str, ...] = ("active",),
|
||||
limit: int = 2000,
|
||||
) -> dict[str, int]:
|
||||
self.ensure_table()
|
||||
queryset = MemoryItem.objects.all().order_by("-updated_at")
|
||||
if user_id is not None:
|
||||
queryset = queryset.filter(user_id=int(user_id))
|
||||
if include_statuses:
|
||||
queryset = queryset.filter(status__in=list(include_statuses))
|
||||
|
||||
scanned = 0
|
||||
indexed = 0
|
||||
batch_size = 100
|
||||
values: list[str] = []
|
||||
for item in queryset[: max(1, int(limit))]:
|
||||
scanned += 1
|
||||
try:
|
||||
values.append(self._build_upsert_values_clause(item))
|
||||
except Exception as exc:
|
||||
log.warning("memory-search upsert build failed id=%s err=%s", item.id, exc)
|
||||
continue
|
||||
if len(values) >= batch_size:
|
||||
self._sql(
|
||||
f"REPLACE INTO {self.table} "
|
||||
"(id,memory_uuid,user_id,conversation_id,memory_kind,status,updated_ts,summary,body) "
|
||||
f"VALUES {','.join(values)}"
|
||||
)
|
||||
indexed += len(values)
|
||||
values = []
|
||||
if values:
|
||||
self._sql(
|
||||
f"REPLACE INTO {self.table} "
|
||||
"(id,memory_uuid,user_id,conversation_id,memory_kind,status,updated_ts,summary,body) "
|
||||
f"VALUES {','.join(values)}"
|
||||
)
|
||||
indexed += len(values)
|
||||
return {"scanned": scanned, "indexed": indexed}
|
||||
|
||||
def search(
|
||||
self,
|
||||
*,
|
||||
|
||||
Reference in New Issue
Block a user