GIA/core/assist/repeat_answer.py

from __future__ import annotations

import hashlib
import re
from dataclasses import dataclass

from asgiref.sync import sync_to_async
from django.utils import timezone

from core.models import AnswerMemory, AnswerSuggestionEvent, Message

_WORD_RE = re.compile(r"[^a-z0-9\s]+", re.IGNORECASE)


@dataclass(slots=True)
class RepeatAnswerSuggestion:
    answer_memory_id: str
    answer_text: str
    score: float


def _normalize_question(text: str) -> str:
    body = str(text or "").strip().lower()
    body = _WORD_RE.sub(" ", body)
    body = re.sub(r"\s+", " ", body).strip()
    return body


def _fingerprint(text: str) -> str:
    norm = _normalize_question(text)
    if not norm:
        return ""
    return hashlib.sha1(norm.encode("utf-8")).hexdigest()


def _is_question(text: str) -> bool:
    body = str(text or "").strip()
    if not body:
        return False
    low = body.lower()
    return body.endswith("?") or low.startswith(
        (
            "what",
            "why",
            "how",
            "when",
            "where",
            "who",
            "can ",
            "do ",
            "did ",
            "is ",
            "are ",
        )
    )


def _is_group_channel(message: Message) -> bool:
    channel = str(getattr(message, "source_chat_id", "") or "").strip().lower()
    if channel.endswith("@g.us"):
        return True
    return (
        str(getattr(message, "source_service", "") or "").strip().lower() == "xmpp"
        and "conference." in channel
    )


async def learn_from_message(message: Message) -> None:
    if message is None:
        return
    text = str(message.text or "").strip()
    if not text:
        return
    if dict(message.message_meta or {}).get("origin_tag"):
        return

    # Build memory by linking obvious reply answers to prior questions.
    if message.reply_to_id and message.reply_to:
        q_text = str(message.reply_to.text or "").strip()
        if _is_question(q_text):
            fp = _fingerprint(q_text)
            if fp:
                await sync_to_async(AnswerMemory.objects.create)(
                    user=message.user,
                    service=message.source_service or "web",
                    channel_identifier=message.source_chat_id or "",
                    question_fingerprint=fp,
                    question_text=q_text,
                    answer_message=message,
                    answer_text=text,
                    confidence_meta={"source": "reply_pair"},
                )


async def find_repeat_answer(user, message: Message) -> RepeatAnswerSuggestion | None:
    if message is None:
        return None
    if not _is_group_channel(message):
        return None
    if dict(message.message_meta or {}).get("origin_tag"):
        return None
    text = str(message.text or "").strip()
    if not _is_question(text):
        return None

    fp = _fingerprint(text)
    if not fp:
        return None

    # channel cooldown for repeated suggestions in short windows
    cooldown_cutoff = timezone.now() - timezone.timedelta(minutes=3)
    cooldown_exists = await sync_to_async(
        lambda: AnswerSuggestionEvent.objects.filter(
            user=user,
            message__source_service=message.source_service,
            message__source_chat_id=message.source_chat_id,
            status="suggested",
            created_at__gte=cooldown_cutoff,
        ).exists()
    )()
    if cooldown_exists:
        return None

    memory = await sync_to_async(
        lambda: AnswerMemory.objects.filter(
            user=user,
            service=message.source_service or "web",
            channel_identifier=message.source_chat_id or "",
            question_fingerprint=fp,
        )
        .order_by("-created_at")
        .first()
    )()
    if not memory:
        return None

    answer = str(memory.answer_text or "").strip()
    if not answer:
        return None

    score = 0.99
    await sync_to_async(AnswerSuggestionEvent.objects.create)(
        user=user,
        message=message,
        status="suggested",
        candidate_answer=memory,
        score=score,
    )
    return RepeatAnswerSuggestion(
        answer_memory_id=str(memory.id),
        answer_text=answer,
        score=score,
    )