Deimplement OSINT+ search

This commit is contained in:
2026-03-12 22:28:33 +00:00
parent 6ceff63b71
commit 12e02ac7ec
13 changed files with 97 additions and 1082 deletions

View File

@@ -2,21 +2,15 @@ from __future__ import annotations
import re
from dataclasses import dataclass, field
from datetime import date, datetime
from datetime import timezone as dt_timezone
from decimal import Decimal, InvalidOperation
from datetime import datetime
from typing import Any, Callable
from urllib.parse import urlencode
from django.contrib.auth.mixins import LoginRequiredMixin
from django.core.exceptions import FieldDoesNotExist
from django.core.paginator import Paginator
from django.db import models
from django.db.models import Q
from django.http import HttpResponseBadRequest
from django.shortcuts import render
from django.urls import reverse
from django.utils import timezone
from django.views import View
from core.models import Group, Manipulation, Message, Person, Persona, PersonIdentifier
@@ -43,11 +37,6 @@ def _safe_page_number(value: Any) -> int:
return max(1, page_value)
def _safe_query_param(request, key: str, default: str = "") -> str:
raw = request.GET.get(key, default)
return str(raw or default).strip()
def _sanitize_query_state(raw: dict[str, Any]) -> dict[str, str]:
cleaned: dict[str, str] = {}
for key, value in (raw or {}).items():
@@ -72,27 +61,6 @@ def _context_type(request_type: str) -> str:
return "modal" if request_type == "page" else request_type
def _parse_bool(raw: str) -> bool | None:
lowered = raw.strip().lower()
truthy = {"1", "true", "yes", "y", "on", "enabled"}
falsy = {"0", "false", "no", "n", "off", "disabled"}
if lowered in truthy:
return True
if lowered in falsy:
return False
return None
def _preferred_related_text_field(model: type[models.Model]) -> str | None:
preferred = ("name", "alias", "title", "identifier", "model")
model_fields = {f.name: f for f in model._meta.get_fields()}
for candidate in preferred:
field = model_fields.get(candidate)
if isinstance(field, (models.CharField, models.TextField)):
return candidate
return None
def _column_field_name(column: "OsintColumn") -> str:
if column.search_lookup:
return str(column.search_lookup).split("__", 1)[0]
@@ -853,814 +821,6 @@ class OSINTListBase(ObjectList):
return context
class OSINTSearch(LoginRequiredMixin, View):
allowed_types = {"page", "widget"}
page_template = "pages/osint-search.html"
widget_template = "mixins/wm/widget.html"
panel_template = "partials/osint/search-panel.html"
result_template = "partials/results_table.html"
per_page_default = 20
per_page_max = 100
all_scope_keys = ("people", "identifiers", "messages")
@dataclass(frozen=True)
class SearchPlan:
size: int
index: str
query: str
tags: tuple[str, ...]
source: str
date_from: str
date_to: str
sort_mode: str
sentiment_min: str
sentiment_max: str
annotate: bool
dedup: bool
reverse: bool
def _prepare_siqtsrss_adr(self, request) -> "OSINTSearch.SearchPlan":
"""
Parse search controls following the Neptune-style SIQTSRSS/ADR flow.
S - Size, I - Index, Q - Query, T - Tags, S - Source, R - Ranges,
S - Sort, S - Sentiment, A - Annotate, D - Dedup, R - Reverse.
"""
query = _sanitize_search_query(_safe_query_param(request, "q", ""))
tags = tuple(
token[4:].strip()
for token in query.split()
if token.lower().startswith("tag:")
)
return self.SearchPlan(
size=self._per_page(request.GET.get("per_page")),
index=self._scope_key(request.GET.get("scope")),
query=query,
tags=tags,
source=_safe_query_param(request, "source", "all").lower() or "all",
date_from=_safe_query_param(request, "date_from", ""),
date_to=_safe_query_param(request, "date_to", ""),
sort_mode=_safe_query_param(request, "sort_mode", "relevance").lower(),
sentiment_min=_safe_query_param(request, "sentiment_min", ""),
sentiment_max=_safe_query_param(request, "sentiment_max", ""),
annotate=_safe_query_param(request, "annotate", "1")
not in {"0", "false", "off"},
dedup=_safe_query_param(request, "dedup", "") in {"1", "true", "on"},
reverse=_safe_query_param(request, "reverse", "") in {"1", "true", "on"},
)
def _parse_date_boundaries(
self, plan: "OSINTSearch.SearchPlan"
) -> tuple[datetime | None, datetime | None]:
parsed_from = None
parsed_to = None
if plan.date_from:
try:
parsed_from = datetime.fromisoformat(plan.date_from)
except ValueError:
parsed_from = None
if plan.date_to:
try:
parsed_to = datetime.fromisoformat(plan.date_to)
except ValueError:
parsed_to = None
if parsed_to is not None:
parsed_to = parsed_to.replace(
hour=23, minute=59, second=59, microsecond=999999
)
return parsed_from, parsed_to
def _score_hit(self, query: str, primary: str, secondary: str) -> int:
if not query:
return 0
needle = query.lower()
return primary.lower().count(needle) * 3 + secondary.lower().count(needle)
def _identifier_exact_boost(self, query: str, identifier: str) -> int:
needle = str(query or "").strip().lower()
hay = str(identifier or "").strip().lower()
if not needle or not hay:
return 0
if hay == needle:
return 120
if hay.startswith(needle):
return 45
if needle in hay:
return 15
return 0
def _message_recency_boost(self, stamp: datetime | None) -> int:
if stamp is None:
return 0
now_ts = timezone.now()
if timezone.is_naive(stamp):
stamp = timezone.make_aware(stamp, dt_timezone.utc)
age = now_ts - stamp
if age.days < 1:
return 40
if age.days < 7:
return 25
if age.days < 30:
return 12
if age.days < 90:
return 6
return 0
def _snippet(self, text: str, query: str, max_len: int = 180) -> str:
value = str(text or "").strip()
if not value:
return ""
if not query:
return value[:max_len]
lower = value.lower()
needle = query.lower()
idx = lower.find(needle)
if idx < 0:
return value[:max_len]
start = max(idx - 40, 0)
end = min(idx + len(needle) + 90, len(value))
snippet = value[start:end]
if start > 0:
snippet = "" + snippet
if end < len(value):
snippet = snippet + ""
return snippet
def _field_options(self, model_cls: type[models.Model]) -> list[dict[str, str]]:
options = []
for model_field in model_cls._meta.get_fields():
# Skip reverse/accessor relations (e.g. ManyToManyRel) that are not
# directly searchable as user-facing fields in this selector.
if model_field.auto_created and not model_field.concrete:
continue
if model_field.name == "user":
continue
label = getattr(
model_field,
"verbose_name",
str(model_field.name).replace("_", " "),
)
options.append(
{
"value": model_field.name,
"label": str(label).title(),
}
)
options.sort(key=lambda item: item["label"])
return options
def _field_q(
self,
model_cls: type[models.Model],
field_name: str,
query: str,
) -> tuple[Q | None, bool]:
try:
field = model_cls._meta.get_field(field_name)
except FieldDoesNotExist:
return None, False
if isinstance(field, (models.CharField, models.TextField, models.UUIDField)):
return Q(**{f"{field_name}__icontains": query}), False
if isinstance(field, models.BooleanField):
parsed = _parse_bool(query)
if parsed is None:
return None, False
return Q(**{field_name: parsed}), False
if isinstance(field, models.IntegerField):
try:
return Q(**{field_name: int(query)}), False
except ValueError:
return None, False
if isinstance(field, (models.FloatField, models.DecimalField)):
try:
value = Decimal(query)
except InvalidOperation:
return None, False
return Q(**{field_name: value}), False
if isinstance(field, models.DateField):
try:
parsed_date = date.fromisoformat(query)
except ValueError:
return None, False
return Q(**{field_name: parsed_date}), False
if isinstance(field, models.DateTimeField):
try:
parsed_dt = datetime.fromisoformat(query)
except ValueError:
try:
parsed_date = date.fromisoformat(query)
except ValueError:
return None, False
return Q(**{f"{field_name}__date": parsed_date}), False
return Q(**{field_name: parsed_dt}), False
if isinstance(field, models.ForeignKey):
related_text_field = _preferred_related_text_field(field.related_model)
if related_text_field:
return (
Q(**{f"{field_name}__{related_text_field}__icontains": query}),
False,
)
return Q(**{f"{field_name}__id__icontains": query}), False
if isinstance(field, models.ManyToManyField):
related_text_field = _preferred_related_text_field(field.related_model)
if related_text_field:
return (
Q(**{f"{field_name}__{related_text_field}__icontains": query}),
True,
)
return Q(**{f"{field_name}__id__icontains": query}), True
return None, False
def _search_queryset(
self,
queryset: models.QuerySet,
model_cls: type[models.Model],
query: str,
field_name: str,
field_options: list[dict[str, str]],
) -> models.QuerySet:
if not query:
return queryset
if field_name != "__all__":
field_q, use_distinct = self._field_q(model_cls, field_name, query)
if field_q is None:
return queryset.none()
queryset = queryset.filter(field_q)
return queryset.distinct() if use_distinct else queryset
condition = Q()
use_distinct = False
for option in field_options:
field_q, field_distinct = self._field_q(
model_cls,
option["value"],
query,
)
if field_q is None:
continue
condition |= field_q
use_distinct = use_distinct or field_distinct
if not condition.children:
return queryset.none()
queryset = queryset.filter(condition)
return queryset.distinct() if use_distinct else queryset
def _per_page(self, raw_value: str | None) -> int:
if not raw_value:
return self.per_page_default
try:
value = int(raw_value)
except ValueError:
return self.per_page_default
if value < 1:
return self.per_page_default
return min(value, self.per_page_max)
def _scope_key(self, raw_scope: str | None) -> str:
if raw_scope == "all":
return "all"
if raw_scope in OSINT_SCOPES:
return raw_scope
return "all"
def _query_state(self, request) -> dict[str, Any]:
return _sanitize_query_state(
{k: v for k, v in request.GET.items() if v not in {None, ""}}
)
def _apply_common_filters(
self,
queryset: models.QuerySet,
scope_key: str,
plan: "OSINTSearch.SearchPlan",
) -> models.QuerySet:
date_from, date_to = self._parse_date_boundaries(plan)
if plan.source and plan.source != "all":
if scope_key == "messages":
queryset = queryset.filter(source_service=plan.source)
elif scope_key == "identifiers":
queryset = queryset.filter(service=plan.source)
elif scope_key == "people":
queryset = queryset.filter(
personidentifier__service=plan.source
).distinct()
if scope_key == "messages":
if date_from is not None:
queryset = queryset.filter(ts__gte=int(date_from.timestamp() * 1000))
if date_to is not None:
queryset = queryset.filter(ts__lte=int(date_to.timestamp() * 1000))
elif scope_key == "people":
if date_from is not None:
queryset = queryset.filter(last_interaction__gte=date_from)
if date_to is not None:
queryset = queryset.filter(last_interaction__lte=date_to)
if plan.sentiment_min:
try:
queryset = queryset.filter(sentiment__gte=float(plan.sentiment_min))
except ValueError:
pass
if plan.sentiment_max:
try:
queryset = queryset.filter(sentiment__lte=float(plan.sentiment_max))
except ValueError:
pass
return queryset
def _search_all_rows(
self,
request,
plan: "OSINTSearch.SearchPlan",
) -> list[dict[str, Any]]:
rows: list[dict[str, Any]] = []
query = plan.query
date_from, date_to = self._parse_date_boundaries(plan)
per_scope_limit = max(40, min(plan.size * 3, 250))
allowed_scopes = set(self.all_scope_keys)
tag_scopes = {
tag.strip().lower()
for tag in plan.tags
if tag.strip().lower() in allowed_scopes
}
if tag_scopes:
allowed_scopes = tag_scopes
if "people" in allowed_scopes:
people_qs = self._apply_common_filters(
Person.objects.filter(user=request.user),
"people",
plan,
)
if query:
people_qs = people_qs.filter(
Q(name__icontains=query)
| Q(summary__icontains=query)
| Q(profile__icontains=query)
| Q(revealed__icontains=query)
| Q(likes__icontains=query)
| Q(dislikes__icontains=query)
)
for item in people_qs.order_by("-last_interaction", "name")[
:per_scope_limit
]:
secondary = self._snippet(
f"{item.summary or ''} {item.profile or ''}".strip(),
query if plan.annotate else "",
)
rows.append(
{
"id": f"person:{item.id}",
"scope": "Contact",
"primary": item.name,
"secondary": secondary or (item.timezone or ""),
"service": "-",
"when": item.last_interaction,
"score": self._score_hit(
query, item.name or "", secondary or ""
),
}
)
if "identifiers" in allowed_scopes:
identifiers_qs = self._apply_common_filters(
PersonIdentifier.objects.filter(user=request.user).select_related(
"person"
),
"identifiers",
plan,
)
if query:
identifiers_qs = identifiers_qs.filter(
Q(identifier__icontains=query)
| Q(person__name__icontains=query)
| Q(service__icontains=query)
)
for item in identifiers_qs.order_by("person__name", "identifier")[
:per_scope_limit
]:
primary = item.person.name if item.person_id else item.identifier
secondary = item.identifier if item.person_id else ""
base_score = self._score_hit(query, primary or "", secondary or "")
exact_boost = self._identifier_exact_boost(query, item.identifier)
rows.append(
{
"id": f"identifier:{item.id}",
"scope": "Identifier",
"primary": primary,
"secondary": secondary,
"service": item.service or "",
"when": None,
"score": base_score + exact_boost,
}
)
if "messages" in allowed_scopes:
messages_qs = self._apply_common_filters(
Message.objects.filter(user=request.user),
"messages",
plan,
)
if query:
messages_qs = messages_qs.filter(
Q(text__icontains=query)
| Q(custom_author__icontains=query)
| Q(sender_uuid__icontains=query)
| Q(source_chat_id__icontains=query)
| Q(source_message_id__icontains=query)
)
for item in messages_qs.order_by("-ts")[:per_scope_limit]:
when_dt = datetime.fromtimestamp(item.ts / 1000.0) if item.ts else None
if date_from and when_dt and when_dt < date_from:
continue
if date_to and when_dt and when_dt > date_to:
continue
primary = (
item.custom_author
or item.sender_uuid
or (item.source_chat_id or "Message")
)
secondary = self._snippet(
item.text or "", query if plan.annotate else ""
)
base_score = self._score_hit(query, primary or "", item.text or "")
recency_boost = self._message_recency_boost(when_dt)
rows.append(
{
"id": f"message:{item.id}",
"scope": "Message",
"primary": primary,
"secondary": secondary,
"service": item.source_service or "-",
"when": when_dt,
"score": base_score + recency_boost,
}
)
if plan.dedup:
seen = set()
deduped = []
for row in rows:
key = (
row["scope"],
str(row["primary"]).strip().lower(),
str(row["secondary"]).strip().lower(),
str(row["service"]).strip().lower(),
)
if key in seen:
continue
seen.add(key)
deduped.append(row)
rows = deduped
def row_time_key(row: dict[str, Any]) -> float:
stamp = row.get("when")
if stamp is None:
return 0.0
if timezone.is_aware(stamp):
return float(stamp.timestamp())
return float(stamp.replace(tzinfo=dt_timezone.utc).timestamp())
if plan.sort_mode == "oldest":
rows.sort(key=row_time_key)
elif plan.sort_mode == "recent":
rows.sort(key=row_time_key, reverse=True)
else:
rows.sort(
key=lambda row: (
row["score"],
row_time_key(row),
),
reverse=True,
)
if plan.reverse:
rows.reverse()
return rows
def _active_sort(self, scope: OsintScopeConfig) -> tuple[str, str]:
direction = self.request.GET.get("dir", "asc").lower()
if direction not in {"asc", "desc"}:
direction = "asc"
allowed = {col.sort_field for col in scope.columns if col.sort_field}
sort_field = self.request.GET.get("sort")
if sort_field not in allowed:
sort_field = scope.default_sort
return sort_field, direction
def _build_column_context(
self,
scope: OsintScopeConfig,
list_url: str,
query_state: dict[str, Any],
) -> list[dict[str, Any]]:
sort_field, direction = self._active_sort(scope)
columns = []
for column in scope.columns:
if not column.sort_field:
columns.append(
{
"key": column.key,
"field_name": _column_field_name(column),
"label": column.label,
"sortable": False,
"kind": column.kind,
}
)
continue
is_sorted = sort_field == column.sort_field
next_direction = "desc" if is_sorted and direction == "asc" else "asc"
sort_query = _merge_query(
query_state,
sort=column.sort_field,
dir=next_direction,
page=1,
)
columns.append(
{
"key": column.key,
"field_name": _column_field_name(column),
"label": column.label,
"sortable": True,
"kind": column.kind,
"is_sorted": is_sorted,
"is_desc": is_sorted and direction == "desc",
"sort_url": _url_with_query(list_url, sort_query),
}
)
return columns
def _build_rows(
self,
scope: OsintScopeConfig,
object_list: list[Any],
request_type: str,
) -> list[dict[str, Any]]:
rows = []
for item in object_list:
row = {"id": str(item.pk), "cells": [], "actions": []}
for column in scope.columns:
row["cells"].append(
{
"kind": column.kind,
"value": column.accessor(item),
}
)
rows.append(row)
return rows
def _build_pagination(
self,
page_obj: Any,
list_url: str,
query_state: dict[str, Any],
) -> dict[str, Any]:
if page_obj is None:
return {"enabled": False}
pagination = {
"enabled": page_obj.paginator.num_pages > 1,
"count": page_obj.paginator.count,
"current": page_obj.number,
"total": page_obj.paginator.num_pages,
"has_previous": page_obj.has_previous(),
"has_next": page_obj.has_next(),
"previous_url": None,
"next_url": None,
"pages": [],
}
if page_obj.has_previous():
previous_page = _safe_page_number(page_obj.previous_page_number())
pagination["previous_url"] = _url_with_query(
list_url,
{"page": previous_page},
)
if page_obj.has_next():
next_page = _safe_page_number(page_obj.next_page_number())
pagination["next_url"] = _url_with_query(
list_url,
{"page": next_page},
)
for entry in page_obj.paginator.get_elided_page_range(page_obj.number):
if entry == "":
pagination["pages"].append({"ellipsis": True})
continue
pagination["pages"].append(
{
"ellipsis": False,
"number": entry,
"current": entry == page_obj.number,
"url": _url_with_query(
list_url,
{"page": _safe_page_number(entry)},
),
}
)
return pagination
def get(self, request, type):
if type not in self.allowed_types:
return HttpResponseBadRequest("Invalid type specified.")
plan = self._prepare_siqtsrss_adr(request)
scope_key = plan.index
query = plan.query
list_url = reverse("osint_search", kwargs={"type": type})
query_state = self._query_state(request)
field_name = request.GET.get("field", "__all__")
if scope_key == "all":
rows_raw = self._search_all_rows(request, plan)
paginator = Paginator(rows_raw, plan.size)
page_obj = paginator.get_page(request.GET.get("page"))
column_context = [
{
"key": "scope",
"field_name": "scope",
"label": "Type",
"sortable": False,
"kind": "text",
},
{
"key": "primary",
"field_name": "primary",
"label": "Primary",
"sortable": False,
"kind": "text",
},
{
"key": "secondary",
"field_name": "secondary",
"label": "Details",
"sortable": False,
"kind": "text",
},
{
"key": "service",
"field_name": "service",
"label": "Service",
"sortable": False,
"kind": "text",
},
{
"key": "when",
"field_name": "when",
"label": "When",
"sortable": False,
"kind": "datetime",
},
]
rows = []
for item in list(page_obj.object_list):
rows.append(
{
"id": item["id"],
"cells": [
{"kind": "text", "value": item.get("scope")},
{"kind": "text", "value": item.get("primary")},
{"kind": "text", "value": item.get("secondary")},
{"kind": "text", "value": item.get("service")},
{"kind": "datetime", "value": item.get("when")},
],
"actions": [],
}
)
pagination = self._build_pagination(page_obj, list_url, query_state)
field_options: list[dict[str, str]] = []
selected_scope_key = "all"
osint_title = "Search Everything"
result_count = paginator.count
else:
scope = OSINT_SCOPES[scope_key]
field_options = self._field_options(scope.model)
if field_name != "__all__":
allowed_fields = {option["value"] for option in field_options}
if field_name not in allowed_fields:
field_name = "__all__"
queryset = scope.model.objects.filter(user=request.user)
if scope.select_related:
queryset = queryset.select_related(*scope.select_related)
if scope.prefetch_related:
queryset = queryset.prefetch_related(*scope.prefetch_related)
queryset = self._apply_common_filters(queryset, scope.key, plan)
queryset = self._search_queryset(
queryset,
scope.model,
query,
field_name,
field_options,
)
if plan.dedup:
queryset = queryset.distinct()
sort_field = request.GET.get("sort", scope.default_sort)
direction = request.GET.get("dir", "asc").lower()
allowed_sort_fields = {
column.sort_field for column in scope.columns if column.sort_field
}
if sort_field not in allowed_sort_fields:
sort_field = scope.default_sort
if direction not in {"asc", "desc"}:
direction = "asc"
if sort_field:
order_by = sort_field if direction == "asc" else f"-{sort_field}"
queryset = queryset.order_by(order_by)
if plan.reverse:
queryset = queryset.reverse()
paginator = Paginator(queryset, plan.size)
page_obj = paginator.get_page(request.GET.get("page"))
object_list = list(page_obj.object_list)
column_context = self._build_column_context(
scope,
list_url,
query_state,
)
rows = self._build_rows(
scope,
object_list,
type,
)
pagination = self._build_pagination(
page_obj,
list_url,
query_state,
)
selected_scope_key = scope.key
osint_title = f"Search {scope.title}"
result_count = paginator.count
context = {
"osint_scope": selected_scope_key,
"osint_title": osint_title,
"osint_table_id": "osint-search-table",
"osint_event_name": "",
"osint_refresh_url": _url_with_query(list_url, query_state),
"osint_columns": column_context,
"osint_rows": rows,
"osint_pagination": pagination,
"osint_show_search": False,
"osint_show_actions": False,
"osint_shell_borderless": True,
"osint_result_count": result_count,
"osint_search_url": list_url,
"scope_options": [
{"value": "all", "label": "All (Contacts + Messages)"},
*[
{"value": key, "label": conf.title}
for key, conf in OSINT_SCOPES.items()
],
],
"field_options": field_options,
"selected_scope": selected_scope_key,
"selected_field": field_name,
"search_query": query,
"selected_per_page": plan.size,
"selected_source": plan.source,
"selected_date_from": plan.date_from,
"selected_date_to": plan.date_to,
"selected_sort_mode": plan.sort_mode,
"selected_sentiment_min": plan.sentiment_min,
"selected_sentiment_max": plan.sentiment_max,
"selected_annotate": plan.annotate,
"selected_dedup": plan.dedup,
"selected_reverse": plan.reverse,
"search_page_url": reverse("osint_search", kwargs={"type": "page"}),
"search_widget_url": reverse("osint_search", kwargs={"type": "widget"}),
}
hx_target = request.headers.get("HX-Target")
if request.htmx and hx_target in {"osint-search-results", "osint-search-table"}:
response = render(request, self.result_template, context)
if type == "page":
response["HX-Replace-Url"] = _url_with_query(list_url, query_state)
return response
if type == "widget":
widget_context = {
"title": "Search",
"unique": "osint-search-widget",
"window_content": self.panel_template,
"widget_options": 'gs-w="8" gs-h="14" gs-x="0" gs-y="0" gs-min-w="5"',
"widget_icon": _safe_icon_class(
request.GET.get("widget_icon"),
"fa-solid fa-magnifying-glass",
),
**context,
}
return render(request, self.widget_template, widget_context)
return render(request, self.page_template, context)
class OSINTWorkspace(LoginRequiredMixin, View):
template_name = "pages/osint-workspace.html"