Improve search

This commit is contained in:
2026-03-02 02:26:25 +00:00
parent a9f5f3f75d
commit b94219fc5b
20 changed files with 1626 additions and 314 deletions

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import date, datetime
from datetime import date, datetime, timezone as dt_timezone
from decimal import Decimal, InvalidOperation
from typing import Any, Callable
from urllib.parse import urlencode
@@ -14,10 +14,11 @@ from django.db.models import Q
from django.http import HttpResponseBadRequest
from django.shortcuts import render
from django.urls import reverse
from django.utils import timezone
from django.views import View
from mixins.views import ObjectList
from core.models import Group, Manipulation, Person, Persona
from core.models import Group, Manipulation, Message, Person, PersonIdentifier, Persona
def _context_type(request_type: str) -> str:
@@ -389,13 +390,130 @@ OSINT_SCOPES: dict[str, OsintScopeConfig] = {
"persona__alias__icontains",
),
),
"identifiers": OsintScopeConfig(
key="identifiers",
title="Identifiers",
model=PersonIdentifier,
list_url_name="identifiers",
update_url_name="identifier_update",
delete_url_name="identifier_delete",
default_sort="identifier",
columns=(
OsintColumn(
key="id",
label="ID",
accessor=lambda item: item.id,
sort_field="id",
search_lookup="id__icontains",
kind="id_copy",
),
OsintColumn(
key="person",
label="Person",
accessor=lambda item: item.person.name if item.person_id else "",
sort_field="person__name",
search_lookup="person__name__icontains",
),
OsintColumn(
key="service",
label="Service",
accessor=lambda item: item.service,
sort_field="service",
search_lookup="service__icontains",
),
OsintColumn(
key="identifier",
label="Identifier",
accessor=lambda item: item.identifier,
sort_field="identifier",
search_lookup="identifier__icontains",
),
),
select_related=("person",),
delete_label=lambda item: item.identifier,
search_lookups=(
"id__icontains",
"person__name__icontains",
"service__icontains",
"identifier__icontains",
),
),
"messages": OsintScopeConfig(
key="messages",
title="Messages",
model=Message,
list_url_name="sessions",
update_url_name="session_update",
delete_url_name="session_delete",
default_sort="ts",
columns=(
OsintColumn(
key="id",
label="ID",
accessor=lambda item: item.id,
sort_field="id",
search_lookup="id__icontains",
kind="id_copy",
),
OsintColumn(
key="service",
label="Service",
accessor=lambda item: item.source_service or "",
sort_field="source_service",
search_lookup="source_service__icontains",
),
OsintColumn(
key="chat",
label="Chat",
accessor=lambda item: {
"display": "",
"copy": item.source_chat_id or "",
},
sort_field="source_chat_id",
search_lookup="source_chat_id__icontains",
kind="chat_ref",
),
OsintColumn(
key="sender",
label="Sender",
accessor=lambda item: item.custom_author or item.sender_uuid or "",
sort_field="sender_uuid",
search_lookup="sender_uuid__icontains",
),
OsintColumn(
key="text",
label="Text",
accessor=lambda item: item.text or "",
search_lookup="text__icontains",
),
OsintColumn(
key="ts",
label="Timestamp",
accessor=lambda item: datetime.fromtimestamp(item.ts / 1000.0),
sort_field="ts",
kind="datetime",
),
),
search_lookups=(
"id__icontains",
"text__icontains",
"source_service__icontains",
"source_chat_id__icontains",
"sender_uuid__icontains",
"custom_author__icontains",
"source_message_id__icontains",
),
),
}
OSINT_SCOPE_ICONS: dict[str, str] = {
"all": "fa-solid fa-globe",
"people": "fa-solid fa-user-group",
"groups": "fa-solid fa-users",
"personas": "fa-solid fa-masks-theater",
"manipulations": "fa-solid fa-sliders",
"identifiers": "fa-solid fa-id-card",
"messages": "fa-solid fa-message",
}
@@ -692,6 +810,124 @@ class OSINTSearch(LoginRequiredMixin, View):
result_template = "partials/results_table.html"
per_page_default = 20
per_page_max = 100
all_scope_keys = ("people", "identifiers", "messages")
@dataclass(frozen=True)
class SearchPlan:
size: int
index: str
query: str
tags: tuple[str, ...]
source: str
date_from: str
date_to: str
sort_mode: str
sentiment_min: str
sentiment_max: str
annotate: bool
dedup: bool
reverse: bool
def _prepare_siqtsrss_adr(self, request) -> "OSINTSearch.SearchPlan":
"""
Parse search controls following the Neptune-style SIQTSRSS/ADR flow.
S - Size, I - Index, Q - Query, T - Tags, S - Source, R - Ranges,
S - Sort, S - Sentiment, A - Annotate, D - Dedup, R - Reverse.
"""
query = str(request.GET.get("q") or "").strip()
tags = tuple(
token[4:].strip()
for token in query.split()
if token.lower().startswith("tag:")
)
return self.SearchPlan(
size=self._per_page(request.GET.get("per_page")),
index=self._scope_key(request.GET.get("scope")),
query=query,
tags=tags,
source=str(request.GET.get("source") or "all").strip().lower() or "all",
date_from=str(request.GET.get("date_from") or "").strip(),
date_to=str(request.GET.get("date_to") or "").strip(),
sort_mode=str(request.GET.get("sort_mode") or "relevance").strip().lower(),
sentiment_min=str(request.GET.get("sentiment_min") or "").strip(),
sentiment_max=str(request.GET.get("sentiment_max") or "").strip(),
annotate=str(request.GET.get("annotate") or "1").strip() not in {"0", "false", "off"},
dedup=str(request.GET.get("dedup") or "").strip() in {"1", "true", "on"},
reverse=str(request.GET.get("reverse") or "").strip() in {"1", "true", "on"},
)
def _parse_date_boundaries(self, plan: "OSINTSearch.SearchPlan") -> tuple[datetime | None, datetime | None]:
parsed_from = None
parsed_to = None
if plan.date_from:
try:
parsed_from = datetime.fromisoformat(plan.date_from)
except ValueError:
parsed_from = None
if plan.date_to:
try:
parsed_to = datetime.fromisoformat(plan.date_to)
except ValueError:
parsed_to = None
if parsed_to is not None:
parsed_to = parsed_to.replace(hour=23, minute=59, second=59, microsecond=999999)
return parsed_from, parsed_to
def _score_hit(self, query: str, primary: str, secondary: str) -> int:
if not query:
return 0
needle = query.lower()
return primary.lower().count(needle) * 3 + secondary.lower().count(needle)
def _identifier_exact_boost(self, query: str, identifier: str) -> int:
needle = str(query or "").strip().lower()
hay = str(identifier or "").strip().lower()
if not needle or not hay:
return 0
if hay == needle:
return 120
if hay.startswith(needle):
return 45
if needle in hay:
return 15
return 0
def _message_recency_boost(self, stamp: datetime | None) -> int:
if stamp is None:
return 0
now_ts = timezone.now()
if timezone.is_naive(stamp):
stamp = timezone.make_aware(stamp, dt_timezone.utc)
age = now_ts - stamp
if age.days < 1:
return 40
if age.days < 7:
return 25
if age.days < 30:
return 12
if age.days < 90:
return 6
return 0
def _snippet(self, text: str, query: str, max_len: int = 180) -> str:
value = str(text or "").strip()
if not value:
return ""
if not query:
return value[:max_len]
lower = value.lower()
needle = query.lower()
idx = lower.find(needle)
if idx < 0:
return value[:max_len]
start = max(idx - 40, 0)
end = min(idx + len(needle) + 90, len(value))
snippet = value[start:end]
if start > 0:
snippet = "" + snippet
if end < len(value):
snippet = snippet + ""
return snippet
def _field_options(self, model_cls: type[models.Model]) -> list[dict[str, str]]:
options = []
@@ -826,13 +1062,209 @@ class OSINTSearch(LoginRequiredMixin, View):
return min(value, self.per_page_max)
def _scope_key(self, raw_scope: str | None) -> str:
if raw_scope == "all":
return "all"
if raw_scope in OSINT_SCOPES:
return raw_scope
return "people"
return "all"
def _query_state(self, request) -> dict[str, Any]:
return {k: v for k, v in request.GET.items() if v not in {None, ""}}
def _apply_common_filters(
self,
queryset: models.QuerySet,
scope_key: str,
plan: "OSINTSearch.SearchPlan",
) -> models.QuerySet:
date_from, date_to = self._parse_date_boundaries(plan)
if plan.source and plan.source != "all":
if scope_key == "messages":
queryset = queryset.filter(source_service=plan.source)
elif scope_key == "identifiers":
queryset = queryset.filter(service=plan.source)
elif scope_key == "people":
queryset = queryset.filter(personidentifier__service=plan.source).distinct()
if scope_key == "messages":
if date_from is not None:
queryset = queryset.filter(ts__gte=int(date_from.timestamp() * 1000))
if date_to is not None:
queryset = queryset.filter(ts__lte=int(date_to.timestamp() * 1000))
elif scope_key == "people":
if date_from is not None:
queryset = queryset.filter(last_interaction__gte=date_from)
if date_to is not None:
queryset = queryset.filter(last_interaction__lte=date_to)
if plan.sentiment_min:
try:
queryset = queryset.filter(sentiment__gte=float(plan.sentiment_min))
except ValueError:
pass
if plan.sentiment_max:
try:
queryset = queryset.filter(sentiment__lte=float(plan.sentiment_max))
except ValueError:
pass
return queryset
def _search_all_rows(
self,
request,
plan: "OSINTSearch.SearchPlan",
) -> list[dict[str, Any]]:
rows: list[dict[str, Any]] = []
query = plan.query
date_from, date_to = self._parse_date_boundaries(plan)
per_scope_limit = max(40, min(plan.size * 3, 250))
allowed_scopes = set(self.all_scope_keys)
tag_scopes = {
tag.strip().lower()
for tag in plan.tags
if tag.strip().lower() in allowed_scopes
}
if tag_scopes:
allowed_scopes = tag_scopes
if "people" in allowed_scopes:
people_qs = self._apply_common_filters(
Person.objects.filter(user=request.user),
"people",
plan,
)
if query:
people_qs = people_qs.filter(
Q(name__icontains=query)
| Q(summary__icontains=query)
| Q(profile__icontains=query)
| Q(revealed__icontains=query)
| Q(likes__icontains=query)
| Q(dislikes__icontains=query)
)
for item in people_qs.order_by("-last_interaction", "name")[:per_scope_limit]:
secondary = self._snippet(
f"{item.summary or ''} {item.profile or ''}".strip(),
query if plan.annotate else "",
)
rows.append(
{
"id": f"person:{item.id}",
"scope": "Contact",
"primary": item.name,
"secondary": secondary or (item.timezone or ""),
"service": "-",
"when": item.last_interaction,
"score": self._score_hit(query, item.name or "", secondary or ""),
}
)
if "identifiers" in allowed_scopes:
identifiers_qs = self._apply_common_filters(
PersonIdentifier.objects.filter(user=request.user).select_related("person"),
"identifiers",
plan,
)
if query:
identifiers_qs = identifiers_qs.filter(
Q(identifier__icontains=query)
| Q(person__name__icontains=query)
| Q(service__icontains=query)
)
for item in identifiers_qs.order_by("person__name", "identifier")[:per_scope_limit]:
primary = item.person.name if item.person_id else item.identifier
secondary = item.identifier if item.person_id else ""
base_score = self._score_hit(query, primary or "", secondary or "")
exact_boost = self._identifier_exact_boost(query, item.identifier)
rows.append(
{
"id": f"identifier:{item.id}",
"scope": "Identifier",
"primary": primary,
"secondary": secondary,
"service": item.service or "",
"when": None,
"score": base_score + exact_boost,
}
)
if "messages" in allowed_scopes:
messages_qs = self._apply_common_filters(
Message.objects.filter(user=request.user),
"messages",
plan,
)
if query:
messages_qs = messages_qs.filter(
Q(text__icontains=query)
| Q(custom_author__icontains=query)
| Q(sender_uuid__icontains=query)
| Q(source_chat_id__icontains=query)
| Q(source_message_id__icontains=query)
)
for item in messages_qs.order_by("-ts")[:per_scope_limit]:
when_dt = datetime.fromtimestamp(item.ts / 1000.0) if item.ts else None
if date_from and when_dt and when_dt < date_from:
continue
if date_to and when_dt and when_dt > date_to:
continue
primary = item.custom_author or item.sender_uuid or (item.source_chat_id or "Message")
secondary = self._snippet(item.text or "", query if plan.annotate else "")
base_score = self._score_hit(query, primary or "", item.text or "")
recency_boost = self._message_recency_boost(when_dt)
rows.append(
{
"id": f"message:{item.id}",
"scope": "Message",
"primary": primary,
"secondary": secondary,
"service": item.source_service or "-",
"when": when_dt,
"score": base_score + recency_boost,
}
)
if plan.dedup:
seen = set()
deduped = []
for row in rows:
key = (
row["scope"],
str(row["primary"]).strip().lower(),
str(row["secondary"]).strip().lower(),
str(row["service"]).strip().lower(),
)
if key in seen:
continue
seen.add(key)
deduped.append(row)
rows = deduped
def row_time_key(row: dict[str, Any]) -> float:
stamp = row.get("when")
if stamp is None:
return 0.0
if timezone.is_aware(stamp):
return float(stamp.timestamp())
return float(stamp.replace(tzinfo=dt_timezone.utc).timestamp())
if plan.sort_mode == "oldest":
rows.sort(key=row_time_key)
elif plan.sort_mode == "recent":
rows.sort(key=row_time_key, reverse=True)
else:
rows.sort(
key=lambda row: (
row["score"],
row_time_key(row),
),
reverse=True,
)
if plan.reverse:
rows.reverse()
return rows
def _active_sort(self, scope: OsintScopeConfig) -> tuple[str, str]:
direction = self.request.GET.get("dir", "asc").lower()
if direction not in {"asc", "desc"}:
@@ -958,69 +1390,111 @@ class OSINTSearch(LoginRequiredMixin, View):
if type not in self.allowed_types:
return HttpResponseBadRequest("Invalid type specified.")
scope_key = self._scope_key(request.GET.get("scope"))
scope = OSINT_SCOPES[scope_key]
field_options = self._field_options(scope.model)
query = request.GET.get("q", "").strip()
field_name = request.GET.get("field", "__all__")
if field_name != "__all__":
allowed_fields = {option["value"] for option in field_options}
if field_name not in allowed_fields:
field_name = "__all__"
queryset = scope.model.objects.filter(user=request.user)
if scope.select_related:
queryset = queryset.select_related(*scope.select_related)
if scope.prefetch_related:
queryset = queryset.prefetch_related(*scope.prefetch_related)
queryset = self._search_queryset(
queryset,
scope.model,
query,
field_name,
field_options,
)
sort_field = request.GET.get("sort", scope.default_sort)
direction = request.GET.get("dir", "asc").lower()
allowed_sort_fields = {
column.sort_field for column in scope.columns if column.sort_field
}
if sort_field not in allowed_sort_fields:
sort_field = scope.default_sort
if direction not in {"asc", "desc"}:
direction = "asc"
if sort_field:
order_by = sort_field if direction == "asc" else f"-{sort_field}"
queryset = queryset.order_by(order_by)
per_page = self._per_page(request.GET.get("per_page"))
paginator = Paginator(queryset, per_page)
page_obj = paginator.get_page(request.GET.get("page"))
object_list = list(page_obj.object_list)
plan = self._prepare_siqtsrss_adr(request)
scope_key = plan.index
query = plan.query
list_url = reverse("osint_search", kwargs={"type": type})
query_state = self._query_state(request)
column_context = self._build_column_context(
scope,
list_url,
query_state,
)
rows = self._build_rows(
scope,
object_list,
type,
)
pagination = self._build_pagination(
page_obj,
list_url,
query_state,
)
field_name = request.GET.get("field", "__all__")
if scope_key == "all":
rows_raw = self._search_all_rows(request, plan)
paginator = Paginator(rows_raw, plan.size)
page_obj = paginator.get_page(request.GET.get("page"))
column_context = [
{"key": "scope", "field_name": "scope", "label": "Type", "sortable": False, "kind": "text"},
{"key": "primary", "field_name": "primary", "label": "Primary", "sortable": False, "kind": "text"},
{"key": "secondary", "field_name": "secondary", "label": "Details", "sortable": False, "kind": "text"},
{"key": "service", "field_name": "service", "label": "Service", "sortable": False, "kind": "text"},
{"key": "when", "field_name": "when", "label": "When", "sortable": False, "kind": "datetime"},
]
rows = []
for item in list(page_obj.object_list):
rows.append(
{
"id": item["id"],
"cells": [
{"kind": "text", "value": item.get("scope")},
{"kind": "text", "value": item.get("primary")},
{"kind": "text", "value": item.get("secondary")},
{"kind": "text", "value": item.get("service")},
{"kind": "datetime", "value": item.get("when")},
],
"actions": [],
}
)
pagination = self._build_pagination(page_obj, list_url, query_state)
field_options: list[dict[str, str]] = []
selected_scope_key = "all"
osint_title = "Search Everything"
result_count = paginator.count
else:
scope = OSINT_SCOPES[scope_key]
field_options = self._field_options(scope.model)
if field_name != "__all__":
allowed_fields = {option["value"] for option in field_options}
if field_name not in allowed_fields:
field_name = "__all__"
queryset = scope.model.objects.filter(user=request.user)
if scope.select_related:
queryset = queryset.select_related(*scope.select_related)
if scope.prefetch_related:
queryset = queryset.prefetch_related(*scope.prefetch_related)
queryset = self._apply_common_filters(queryset, scope.key, plan)
queryset = self._search_queryset(
queryset,
scope.model,
query,
field_name,
field_options,
)
if plan.dedup:
queryset = queryset.distinct()
sort_field = request.GET.get("sort", scope.default_sort)
direction = request.GET.get("dir", "asc").lower()
allowed_sort_fields = {
column.sort_field for column in scope.columns if column.sort_field
}
if sort_field not in allowed_sort_fields:
sort_field = scope.default_sort
if direction not in {"asc", "desc"}:
direction = "asc"
if sort_field:
order_by = sort_field if direction == "asc" else f"-{sort_field}"
queryset = queryset.order_by(order_by)
if plan.reverse:
queryset = queryset.reverse()
paginator = Paginator(queryset, plan.size)
page_obj = paginator.get_page(request.GET.get("page"))
object_list = list(page_obj.object_list)
column_context = self._build_column_context(
scope,
list_url,
query_state,
)
rows = self._build_rows(
scope,
object_list,
type,
)
pagination = self._build_pagination(
page_obj,
list_url,
query_state,
)
selected_scope_key = scope.key
osint_title = f"Search {scope.title}"
result_count = paginator.count
context = {
"osint_scope": scope.key,
"osint_title": f"Search {scope.title}",
"osint_scope": selected_scope_key,
"osint_title": osint_title,
"osint_table_id": "osint-search-table",
"osint_event_name": "",
"osint_refresh_url": _url_with_query(list_url, query_state),
@@ -1029,17 +1503,30 @@ class OSINTSearch(LoginRequiredMixin, View):
"osint_pagination": pagination,
"osint_show_search": False,
"osint_show_actions": False,
"osint_result_count": paginator.count,
"osint_shell_borderless": True,
"osint_result_count": result_count,
"osint_search_url": list_url,
"scope_options": [
{"value": key, "label": conf.title}
for key, conf in OSINT_SCOPES.items()
{"value": "all", "label": "All (Contacts + Messages)"},
*[
{"value": key, "label": conf.title}
for key, conf in OSINT_SCOPES.items()
],
],
"field_options": field_options,
"selected_scope": scope.key,
"selected_scope": selected_scope_key,
"selected_field": field_name,
"search_query": query,
"selected_per_page": per_page,
"selected_per_page": plan.size,
"selected_source": plan.source,
"selected_date_from": plan.date_from,
"selected_date_to": plan.date_to,
"selected_sort_mode": plan.sort_mode,
"selected_sentiment_min": plan.sentiment_min,
"selected_sentiment_max": plan.sentiment_max,
"selected_annotate": plan.annotate,
"selected_dedup": plan.dedup,
"selected_reverse": plan.reverse,
"search_page_url": reverse("osint_search", kwargs={"type": "page"}),
"search_widget_url": reverse("osint_search", kwargs={"type": "widget"}),
}