Implement attachment view

This commit is contained in:
2026-02-15 18:58:58 +00:00
parent e7aac36ef9
commit 4cf75b9923
8 changed files with 914 additions and 69 deletions

View File

@@ -0,0 +1,162 @@
from django.core.management.base import BaseCommand
from django.db.models import Q
from core.models import Message, MessageEvent
EMPTY_TEXT_VALUES = {
"",
"[No Body]",
"[no body]",
}
def _normalize_url(value):
if not value:
return ""
text = str(value).strip().rstrip(".,);:!?\"'")
if text.startswith("http://") or text.startswith("https://"):
return text
return ""
def _extract_urls_from_attachment_blob(blob):
urls = []
if isinstance(blob, str):
normalized = _normalize_url(blob)
if normalized:
urls.append(normalized)
return urls
if isinstance(blob, dict):
for key in ("url", "source_url", "download_url", "proxy_url", "href"):
normalized = _normalize_url(blob.get(key))
if normalized:
urls.append(normalized)
nested = blob.get("attachments")
if isinstance(nested, list):
for row in nested:
urls.extend(_extract_urls_from_attachment_blob(row))
return urls
if isinstance(blob, list):
for row in blob:
urls.extend(_extract_urls_from_attachment_blob(row))
return urls
def _uniq(values):
seen = set()
output = []
for value in values:
if value in seen:
continue
seen.add(value)
output.append(value)
return output
class Command(BaseCommand):
help = (
"Backfill empty Message.text rows originating from XMPP by recovering "
"attachment URLs from MessageEvent metadata."
)
def add_arguments(self, parser):
parser.add_argument(
"--apply",
action="store_true",
help="Persist updates. Without this flag, runs as dry-run.",
)
parser.add_argument(
"--user-id",
type=int,
default=None,
help="Limit processing to one user ID.",
)
parser.add_argument(
"--limit",
type=int,
default=0,
help="Maximum number of candidate rows to inspect (0 = no limit).",
)
def _candidate_events(self, message):
linked = MessageEvent.objects.filter(
user=message.user,
raw_payload_ref__legacy_message_id=str(message.id),
)
if linked.exists():
return linked
# Fallback heuristic for older rows with missing legacy refs.
window = 2000
return MessageEvent.objects.filter(
user=message.user,
source_system="xmpp",
ts__gte=int(message.ts) - window,
ts__lte=int(message.ts) + window,
).exclude(attachments=[])
def handle(self, *args, **options):
apply_changes = bool(options.get("apply"))
user_id = options.get("user_id")
limit = int(options.get("limit") or 0)
queryset = Message.objects.filter(
sender_uuid__iexact="xmpp",
).filter(
Q(text__isnull=True) | Q(text__exact="") | Q(text__iexact="[No Body]")
)
if user_id:
queryset = queryset.filter(user_id=user_id)
queryset = queryset.order_by("ts", "id")
if limit > 0:
queryset = queryset[:limit]
inspected = 0
recoverable = 0
updated = 0
unrecoverable = 0
for message in queryset.iterator():
inspected += 1
current_text = str(message.text or "").strip()
if current_text not in EMPTY_TEXT_VALUES:
continue
urls = []
for event in self._candidate_events(message):
urls.extend(_extract_urls_from_attachment_blob(event.attachments))
urls.extend(
_extract_urls_from_attachment_blob(event.raw_payload_ref or {})
)
if urls:
break
urls = _uniq(urls)
if not urls:
unrecoverable += 1
continue
recoverable += 1
new_text = "\n".join(urls)
if apply_changes:
message.text = new_text
message.save(update_fields=["text"])
updated += 1
else:
self.stdout.write(
f"[dry-run] {message.id}: would set {len(urls)} URL(s)"
)
mode = "apply" if apply_changes else "dry-run"
self.stdout.write(
self.style.SUCCESS(
"XMPP attachment URL backfill complete "
f"({mode}): inspected={inspected}, "
f"recoverable={recoverable}, "
f"updated={updated}, "
f"unrecoverable={unrecoverable}"
)
)