Fix XMPP auth

This commit is contained in:
2026-03-06 18:21:24 +00:00
parent 8c091b1e6d
commit 49aaed5dec
4 changed files with 1193 additions and 1 deletions

View File

@@ -0,0 +1,495 @@
"""
Integration tests for XMPP connectivity.
These tests require the GIA stack to be running (Prosody + gia app). They probe
both the XEP-0114 component port and the c2s (client-to-server) port on 5222,
mirroring exactly the flow a phone XMPP client uses:
TCP connect → STARTTLS → TLS upgrade (with cert check) → SASL PLAIN auth
Tests are skipped automatically when XMPP settings are absent (e.g. in CI
environments without a running stack).
"""
from __future__ import annotations
import base64
import hashlib
import http.client
import re
import socket
import ssl
import time
import unittest
import urllib.parse
import xml.etree.ElementTree as ET
from django.conf import settings
from django.test import SimpleTestCase
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _xmpp_configured() -> bool:
return bool(
getattr(settings, "XMPP_JID", None)
and getattr(settings, "XMPP_SECRET", None)
and getattr(settings, "XMPP_ADDRESS", None)
)
def _xmpp_address() -> str:
return str(settings.XMPP_ADDRESS)
def _xmpp_component_port() -> int:
return int(getattr(settings, "XMPP_PORT", None) or 8888)
def _xmpp_c2s_port() -> int:
"""Standard XMPP c2s port (same as a phone client would use)."""
return int(getattr(settings, "XMPP_C2S_PORT", None) or 5222)
def _xmpp_domain() -> str:
"""The VirtualHost domain (zm.is), derived from XMPP_JID or XMPP_DOMAIN."""
domain = getattr(settings, "XMPP_DOMAIN", None)
if domain:
return str(domain)
jid = str(settings.XMPP_JID)
# Component JID is like "jews.zm.is" → parent domain is "zm.is"
parts = jid.split(".")
if len(parts) > 2:
return ".".join(parts[1:])
return jid
def _prosody_auth_endpoint() -> str:
"""URL of the Django auth bridge that Prosody calls for c2s authentication."""
return str(getattr(settings, "PROSODY_AUTH_ENDPOINT", "http://127.0.0.1:8090/internal/prosody/auth/"))
def _recv_until(sock: socket.socket, patterns: list[bytes], timeout: float = 8.0, max_bytes: int = 16384) -> bytes:
"""Read from sock until one of the byte patterns appears or timeout/max_bytes hit."""
buf = b""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
remaining = deadline - time.monotonic()
sock.settimeout(max(0.1, remaining))
try:
chunk = sock.recv(4096)
except socket.timeout:
break
if not chunk:
break
buf += chunk
if any(p in buf for p in patterns):
break
if len(buf) >= max_bytes:
break
return buf
def _component_handshake(address: str, port: int, jid: str, secret: str, timeout: float = 5.0) -> tuple[bool, str]:
"""
Attempt an XEP-0114 external component handshake.
Returns (success, message).
"""
stream_open = (
"<?xml version='1.0'?>"
f"<stream:stream xmlns='jabber:component:accept' "
f"xmlns:stream='http://etherx.jabber.org/streams' "
f"to='{jid}'>"
)
try:
with socket.create_connection((address, port), timeout=timeout) as sock:
sock.settimeout(timeout)
sock.sendall(stream_open.encode())
buf = _recv_until(sock, [b"id="], timeout=timeout)
header_text = buf.decode(errors="replace")
try:
root = ET.fromstring(header_text + "</stream:stream>")
stream_id = root.get("id", "")
except ET.ParseError:
m = re.search(r'\bid=["\']([^"\']+)["\']', header_text)
stream_id = m.group(1) if m else ""
if not stream_id:
return False, f"No stream id in header: {header_text[:200]}"
token = hashlib.sha1((stream_id + secret).encode()).hexdigest()
sock.sendall(f"<handshake>{token}</handshake>".encode())
response = _recv_until(sock, [b"<handshake", b"<stream:error"], timeout=timeout)
resp_text = response.decode(errors="replace")
if "<handshake/>" in resp_text or "<handshake />" in resp_text:
return True, "Handshake accepted"
if "conflict" in resp_text and "already connected" in resp_text:
return True, "Credentials valid (component already connected)"
if "not-authorized" in resp_text:
return False, "Handshake rejected: not-authorized"
if response:
return False, f"Unexpected response: {resp_text[:200]}"
return False, "No response received after handshake"
except socket.timeout:
return False, f"Timed out connecting to {address}:{port}"
except ConnectionRefusedError:
return False, f"Connection refused to {address}:{port}"
except OSError as exc:
return False, f"Socket error: {exc}"
class _C2SResult:
"""Return value from _c2s_sasl_auth."""
def __init__(self, success: bool, stage: str, detail: str):
self.success = success # True = SASL <success/>
self.stage = stage # where we got to: tcp/starttls/tls/features/auth
self.detail = detail # human-readable explanation
def __repr__(self):
return f"<C2SResult success={self.success} stage={self.stage!r} detail={self.detail!r}>"
def _c2s_sasl_auth(
address: str,
port: int,
domain: str,
username: str,
password: str,
verify_cert: bool = True,
timeout: float = 10.0,
) -> _C2SResult:
"""
Mirror the full c2s auth flow a phone XMPP client performs:
1. TCP connect to address:port
2. Open XMPP stream to domain
3. Receive <features> with <starttls>
4. Send <starttls/>, receive <proceed/>
5. ssl.wrap_socket with server_hostname=domain (cert validation if verify_cert)
6. Re-open XMPP stream
7. Receive post-TLS <features> with SASL mechanisms
8. Send SASL PLAIN base64(\x00username\x00password)
9. Return _C2SResult with (True, "auth") on <success/> or (False, "auth") on <failure/>
"""
NS_STREAM = "http://etherx.jabber.org/streams"
NS_TLS = "urn:ietf:params:xml:ns:xmpp-tls"
NS_SASL = "urn:ietf:params:xml:ns:xmpp-sasl"
def stream_open(to: str) -> bytes:
return (
"<?xml version='1.0'?>"
f"<stream:stream xmlns='jabber:client' "
f"xmlns:stream='{NS_STREAM}' "
f"to='{to}' version='1.0'>"
).encode()
try:
raw = socket.create_connection((address, port), timeout=timeout)
except ConnectionRefusedError:
return _C2SResult(False, "tcp", f"Connection refused to {address}:{port}")
except (socket.timeout, OSError) as exc:
return _C2SResult(False, "tcp", f"TCP connect failed: {exc}")
try:
raw.settimeout(timeout)
raw.sendall(stream_open(domain))
# --- Receive pre-TLS features (expect <starttls>) ---
buf = _recv_until(raw, [b"</stream:features>", b"<stream:error"], timeout=timeout)
text = buf.decode(errors="replace")
if "<stream:error" in text:
return _C2SResult(False, "starttls", f"Stream error before features: {text[:200]}")
if "starttls" not in text.lower():
return _C2SResult(False, "starttls", f"No <starttls> in pre-TLS features: {text[:300]}")
# --- Negotiate STARTTLS ---
raw.sendall(f"<starttls xmlns='{NS_TLS}'/>".encode())
buf2 = _recv_until(raw, [b"<proceed", b"<failure"], timeout=timeout)
text2 = buf2.decode(errors="replace")
if "<proceed" not in text2:
return _C2SResult(False, "starttls", f"No <proceed/> after STARTTLS request: {text2[:200]}")
# --- Upgrade to TLS ---
ctx = ssl.create_default_context()
if not verify_cert:
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
try:
tls = ctx.wrap_socket(raw, server_hostname=domain)
except ssl.SSLCertVerificationError as exc:
return _C2SResult(False, "tls", f"TLS cert verification failed for {domain!r}: {exc}")
except ssl.SSLError as exc:
return _C2SResult(False, "tls", f"TLS handshake error: {exc}")
tls.settimeout(timeout)
# --- Re-open stream over TLS ---
tls.sendall(stream_open(domain))
buf3 = _recv_until(tls, [b"</stream:features>", b"<stream:error"], timeout=timeout)
text3 = buf3.decode(errors="replace")
if "<stream:error" in text3:
return _C2SResult(False, "features", f"Stream error after TLS: {text3[:200]}")
mechanisms = re.findall(r"<mechanism>([^<]+)</mechanism>", text3, re.IGNORECASE)
if not mechanisms:
return _C2SResult(False, "features", f"No SASL mechanisms in post-TLS features: {text3[:300]}")
if "PLAIN" not in [m.upper() for m in mechanisms]:
return _C2SResult(False, "features", f"SASL PLAIN not offered; got: {mechanisms}")
# --- SASL PLAIN auth ---
credential = base64.b64encode(f"\x00{username}\x00{password}".encode()).decode()
tls.sendall(
f"<auth xmlns='{NS_SASL}' mechanism='PLAIN'>{credential}</auth>".encode()
)
buf4 = _recv_until(tls, [b"<success", b"<failure", b"<stream:error"], timeout=timeout)
text4 = buf4.decode(errors="replace")
if "<success" in text4:
return _C2SResult(True, "auth", "SASL PLAIN accepted")
if "<failure" in text4:
# Extract the failure condition element name (e.g. not-authorized)
m = re.search(r"<failure[^>]*>\s*<([a-z-]+)", text4)
condition = m.group(1) if m else "unknown"
return _C2SResult(False, "auth", f"SASL PLAIN rejected: {condition}{text4[:200]}")
if "<stream:error" in text4:
return _C2SResult(False, "auth", f"Stream error during auth: {text4[:200]}")
return _C2SResult(False, "auth", f"No auth response received: {text4[:200]}")
finally:
try:
raw.close()
except Exception:
pass
# ---------------------------------------------------------------------------
# Component tests (XEP-0114)
# ---------------------------------------------------------------------------
@unittest.skipUnless(_xmpp_configured(), "XMPP settings not configured")
class XMPPComponentTests(SimpleTestCase):
def test_component_port_reachable(self):
"""Prosody component port accepts TCP connections."""
addr = _xmpp_address()
port = _xmpp_component_port()
try:
with socket.create_connection((addr, port), timeout=5):
pass
except (ConnectionRefusedError, socket.timeout, OSError) as exc:
self.fail(f"Cannot reach XMPP component port {addr}:{port}: {exc}")
def test_component_handshake_succeeds(self):
"""Prosody accepts the component JID and secret (full XEP-0114 handshake)."""
ok, msg = _component_handshake(
address=_xmpp_address(),
port=_xmpp_component_port(),
jid=str(settings.XMPP_JID),
secret=str(settings.XMPP_SECRET),
)
self.assertTrue(ok, msg)
def test_wrong_secret_rejected(self):
"""Prosody rejects a component connection with an invalid secret."""
ok, msg = _component_handshake(
address=_xmpp_address(),
port=_xmpp_component_port(),
jid=str(settings.XMPP_JID),
secret="definitely-wrong-secret",
)
self.assertFalse(ok, f"Expected rejection but got: {msg}")
# ---------------------------------------------------------------------------
# Auth bridge tests (what Prosody calls to validate user passwords)
# ---------------------------------------------------------------------------
@unittest.skipUnless(_xmpp_configured(), "XMPP settings not configured")
class XMPPAuthBridgeTests(SimpleTestCase):
"""
Tests that probe the Django auth bridge endpoint that Prosody calls when a
phone client attempts to log in. If this endpoint is unreachable, ALL
c2s logins will silently fail with not-authorized regardless of password.
"""
def _parse_endpoint(self):
url = _prosody_auth_endpoint()
parsed = urllib.parse.urlparse(url)
return parsed.scheme, parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80), parsed.path
def test_auth_endpoint_tcp_reachable(self):
"""Auth bridge port (8090) is listening inside the pod."""
_, host, port, _ = self._parse_endpoint()
try:
with socket.create_connection((host, port), timeout=5):
pass
except (ConnectionRefusedError, socket.timeout, OSError) as exc:
self.fail(
f"Cannot reach auth bridge at {host}:{port}: {exc}\n"
"This means uWSGI is not binding http-socket=127.0.0.1:8090 — "
"ALL c2s logins will fail with not-authorized."
)
def test_auth_endpoint_rejects_bad_secret(self):
"""Auth bridge returns 0 (or error) for a request with a wrong XMPP_SECRET."""
_, host, port, path = self._parse_endpoint()
# isuser command with wrong secret — should be rejected or return 0
query = "?command=isuser%3Anonexistent%3Azm.is&secret=wrongsecret"
try:
conn = http.client.HTTPConnection(host, port, timeout=5)
conn.request("GET", path + query)
resp = conn.getresponse()
body = resp.read().decode(errors="replace").strip()
conn.close()
except (ConnectionRefusedError, OSError) as exc:
self.fail(f"Could not connect to auth bridge: {exc}")
# Should not return "1" (success) with wrong secret
self.assertNotEqual(body, "1", f"Auth bridge accepted a request with wrong secret (body={body!r})")
def test_auth_endpoint_isuser_returns_zero_or_one(self):
"""Auth bridge responds with '0' or '1' for an isuser query (not an error page)."""
secret = getattr(settings, "XMPP_SECRET", "")
_, host, port, path = self._parse_endpoint()
query = f"?command=isuser%3Anonexistent%3Azm.is&secret={urllib.parse.quote(secret)}"
try:
conn = http.client.HTTPConnection(host, port, timeout=5)
conn.request("GET", path + query)
resp = conn.getresponse()
body = resp.read().decode(errors="replace").strip()
conn.close()
except (ConnectionRefusedError, OSError) as exc:
self.fail(f"Could not connect to auth bridge: {exc}")
self.assertIn(body, ("0", "1"), f"Unexpected auth bridge response {body!r} (expected '0' or '1')")
# ---------------------------------------------------------------------------
# c2s (client-to-server) tests — mirrors the phone's XMPP connection flow
# ---------------------------------------------------------------------------
@unittest.skipUnless(_xmpp_configured(), "XMPP settings not configured")
class XMPPClientAuthTests(SimpleTestCase):
"""
Full end-to-end XMPP c2s tests that reproduce exactly what a phone client
does when it connects. If the phone cannot authenticate, the
test_c2s_invalid_credentials_rejected test should confirm the error is
not-authorized (not a connection/TLS error), and test_c2s_valid_credentials_accepted
should fail for the same reason as the phone.
"""
def test_c2s_port_reachable(self):
"""Prosody c2s port 5222 accepts TCP connections."""
addr = _xmpp_address()
port = _xmpp_c2s_port()
try:
with socket.create_connection((addr, port), timeout=5):
pass
except (ConnectionRefusedError, socket.timeout, OSError) as exc:
self.fail(f"Cannot reach XMPP c2s port {addr}:{port}: {exc}")
def test_c2s_tls_cert_valid_for_domain(self):
"""
Prosody's TLS certificate is valid for the XMPP domain (what the phone checks).
Failure here means the phone will see a cert error before even trying to log in.
"""
addr = _xmpp_address()
port = _xmpp_c2s_port()
domain = _xmpp_domain()
result = _c2s_sasl_auth(
address=addr, port=port, domain=domain,
username="certcheck", password="certcheck",
verify_cert=True, timeout=10.0,
)
# We only care that we got past TLS — a SASL failure at stage "auth" is fine.
self.assertNotEqual(
result.stage, "tls",
f"TLS cert validation failed for domain {domain!r}: {result.detail}\n"
"Phone will see a certificate error — it cannot connect at all."
)
self.assertNotEqual(
result.stage, "tcp",
f"Could not reach c2s port at all: {result.detail}"
)
self.assertNotEqual(
result.stage, "starttls",
f"STARTTLS negotiation failed: {result.detail}"
)
def test_c2s_sasl_plain_offered(self):
"""Prosody offers SASL PLAIN after STARTTLS (required for password auth)."""
addr = _xmpp_address()
port = _xmpp_c2s_port()
domain = _xmpp_domain()
result = _c2s_sasl_auth(
address=addr, port=port, domain=domain,
username="saslcheck", password="saslcheck",
verify_cert=False, timeout=10.0,
)
# We should reach the "auth" stage (SASL PLAIN was offered and we tried it).
# Reaching any earlier stage means SASL PLAIN wasn't offered or something broke.
self.assertIn(
result.stage, ("auth",),
f"Did not reach SASL auth stage — stopped at {result.stage!r}: {result.detail}\n"
"Check that allow_unencrypted_plain_auth = true in prosody config."
)
def test_c2s_invalid_credentials_rejected(self):
"""
Prosody returns not-authorized for bad credentials — not a connection error.
This is the minimum bar: if this test fails with a connection error
(stage != 'auth'), it means the auth path itself is broken (e.g. the
Django auth bridge endpoint is unreachable). In that case, even valid
credentials would fail, which is exactly what the phone experiences.
"""
addr = _xmpp_address()
port = _xmpp_c2s_port()
domain = _xmpp_domain()
result = _c2s_sasl_auth(
address=addr, port=port, domain=domain,
username="nobody_special",
password="definitely-wrong-password-xyz",
verify_cert=False, timeout=10.0,
)
self.assertFalse(result.success, f"Expected auth failure for invalid creds but got success: {result}")
self.assertEqual(
result.stage, "auth",
f"Auth failed at stage {result.stage!r} (expected 'auth' / not-authorized).\n"
f"Detail: {result.detail}\n"
"This means Prosody cannot reach the Django auth bridge — "
"valid credentials would also fail. "
"Check that uWSGI has http-socket=127.0.0.1:8090 and the container is running."
)
self.assertIn(
"not-authorized", result.detail,
f"Expected 'not-authorized' failure, got: {result.detail}"
)
@unittest.skipUnless(
bool(__import__("os").environ.get("XMPP_TEST_USER")),
"Set XMPP_TEST_USER and XMPP_TEST_PASSWORD env vars to run live credential test",
)
def test_c2s_valid_credentials_accepted(self):
"""
Real credentials (XMPP_TEST_USER / XMPP_TEST_PASSWORD) are accepted.
Skipped unless env vars are set — run manually to verify end-to-end login.
"""
import os
addr = _xmpp_address()
port = _xmpp_c2s_port()
domain = _xmpp_domain()
username = os.environ["XMPP_TEST_USER"]
password = os.environ.get("XMPP_TEST_PASSWORD", "")
result = _c2s_sasl_auth(
address=addr, port=port, domain=domain,
username=username, password=password,
verify_cert=True, timeout=10.0,
)
self.assertTrue(
result.success,
f"Login with XMPP_TEST_USER={username!r} failed at stage {result.stage!r}: {result.detail}"
)

View File

@@ -8,6 +8,9 @@ pidfile=/tmp/project-master.pid
socket=/var/run/uwsgi-gia.sock
# socket 777
chmod-socket=777
# Internal HTTP listener for in-pod services (e.g. Prosody auth bridge).
# Binds loopback only; external traffic still goes through the Unix socket.
http-socket = 127.0.0.1:8090
harakiri=200
#max-requests=100000
# Set a lower value for max-requests to prevent memory leaks from building up over time

View File

@@ -32,7 +32,7 @@ PROSODY_CONTAINER="$(name_with_stack "prosody_gia")"
REDIS_DATA_DIR="${QUADLET_REDIS_DATA_DIR:-$ROOT_DIR/.podman/gia_redis_data}"
WHATSAPP_DATA_DIR="${QUADLET_WHATSAPP_DATA_DIR:-$ROOT_DIR/.podman/gia_whatsapp_data}"
SQLITE_DATA_DIR="${QUADLET_SQLITE_DATA_DIR:-$ROOT_DIR/.podman/gia_sqlite_data}"
PROSODY_CONFIG_FILE="${QUADLET_PROSODY_CONFIG_FILE:-$ROOT_DIR/utilities/prosody/prosody.cfg.lua}"
PROSODY_CONFIG_FILE="" # resolved after load_env in start_stack
PROSODY_CERTS_DIR="${QUADLET_PROSODY_CERTS_DIR:-$ROOT_DIR/.podman/gia_prosody_certs}"
PROSODY_DATA_DIR="${QUADLET_PROSODY_DATA_DIR:-$ROOT_DIR/.podman/gia_prosody_data}"
PROSODY_LOGS_DIR="${QUADLET_PROSODY_LOGS_DIR:-$ROOT_DIR/.podman/gia_prosody_logs}"
@@ -178,6 +178,7 @@ down_stack() {
start_stack() {
require_podman
load_env
PROSODY_CONFIG_FILE="${QUADLET_PROSODY_CONFIG_FILE:-$ROOT_DIR/utilities/prosody/prosody.cfg.lua}"
REPO_DIR="$(resolve_path "$REPO_DIR")"
APP_DATABASE_FILE="$(resolve_path "$APP_DATABASE_FILE")"
APP_DATABASE_BASENAME="$(basename "$APP_DATABASE_FILE")"
@@ -191,6 +192,9 @@ start_stack() {
touch "$HOST_DATABASE_FILE"
chmod 0666 "$HOST_DATABASE_FILE" 2>/dev/null || true
down_stack
# Brief pause so the OS releases port bindings from the previous Prosody
# process before new containers try to bind them.
sleep 2
local port_offset="${GIA_STACK_PORT_OFFSET:-}"
if [[ -z "$port_offset" && -n "$STACK_ID" ]]; then

View File

@@ -0,0 +1,690 @@
-- XEP-0198: Stream Management for Prosody IM
--
-- Copyright (C) 2010-2015 Matthew Wild
-- Copyright (C) 2010 Waqas Hussain
-- Copyright (C) 2012-2021 Kim Alvefur
-- Copyright (C) 2012 Thijs Alkemade
-- Copyright (C) 2014 Florian Zeitz
-- Copyright (C) 2016-2020 Thilo Molitor
--
-- This project is MIT/X11 licensed. Please see the
-- COPYING file in the source package for more information.
--
local st = require "util.stanza";
local dep = require "util.dependencies";
local cache = dep.softreq("util.cache"); -- only available in prosody 0.10+
local uuid_generate = require "util.uuid".generate;
local jid = require "util.jid";
local t_remove = table.remove;
local math_min = math.min;
local math_max = math.max;
local os_time = os.time;
local tonumber, tostring = tonumber, tostring;
local add_filter = require "util.filters".add_filter;
local timer = require "util.timer";
local datetime = require "util.datetime";
local xmlns_mam2 = "urn:xmpp:mam:2";
local xmlns_sm2 = "urn:xmpp:sm:2";
local xmlns_sm3 = "urn:xmpp:sm:3";
local xmlns_errors = "urn:ietf:params:xml:ns:xmpp-stanzas";
local xmlns_delay = "urn:xmpp:delay";
local sm2_attr = { xmlns = xmlns_sm2 };
local sm3_attr = { xmlns = xmlns_sm3 };
local resume_timeout = module:get_option_number("smacks_hibernation_time", 600);
local s2s_smacks = module:get_option_boolean("smacks_enabled_s2s", true);
local s2s_resend = module:get_option_boolean("smacks_s2s_resend", false);
local max_unacked_stanzas = module:get_option_number("smacks_max_unacked_stanzas", 0);
local max_inactive_unacked_stanzas = module:get_option_number("smacks_max_inactive_unacked_stanzas", 256);
local delayed_ack_timeout = module:get_option_number("smacks_max_ack_delay", 30);
local max_hibernated_sessions = module:get_option_number("smacks_max_hibernated_sessions", 10);
local max_old_sessions = module:get_option_number("smacks_max_old_sessions", 10);
local core_process_stanza = prosody.core_process_stanza;
local sessionmanager = require"core.sessionmanager";
assert(max_hibernated_sessions > 0, "smacks_max_hibernated_sessions must be greater than 0");
assert(max_old_sessions > 0, "smacks_max_old_sessions must be greater than 0");
local c2s_sessions = module:shared("/*/c2s/sessions");
local function init_session_cache(max_entries, evict_callback)
-- old prosody version < 0.10 (no limiting at all!)
if not cache then
local store = {};
return {
get = function(user, key)
if not user then return nil; end
if not key then return nil; end
return store[key];
end;
set = function(user, key, value)
if not user then return nil; end
if not key then return nil; end
store[key] = value;
end;
};
end
-- use per user limited cache for prosody >= 0.10
local stores = {};
return {
get = function(user, key)
if not user then return nil; end
if not key then return nil; end
if not stores[user] then
stores[user] = cache.new(max_entries, evict_callback);
end
return stores[user]:get(key);
end;
set = function(user, key, value)
if not user then return nil; end
if not key then return nil; end
if not stores[user] then stores[user] = cache.new(max_entries, evict_callback); end
stores[user]:set(key, value);
-- remove empty caches completely
if not stores[user]:count() then stores[user] = nil; end
end;
};
end
local old_session_registry = init_session_cache(max_old_sessions, nil);
local session_registry = init_session_cache(max_hibernated_sessions, function(resumption_token, session)
if session.destroyed then return true; end -- destroyed session can always be removed from cache
session.log("warn", "User has too much hibernated sessions, removing oldest session (token: %s)", resumption_token);
-- store old session's h values on force delete
-- save only actual h value and username/host (for security)
old_session_registry.set(session.username, resumption_token, {
h = session.handled_stanza_count,
username = session.username,
host = session.host
});
return true; -- allow session to be removed from full cache to make room for new one
end);
local function stoppable_timer(delay, callback)
local stopped = false;
local timer = module:add_timer(delay, function (t)
if stopped then return; end
return callback(t);
end);
if timer and timer.stop then return timer; end -- new prosody api includes stop() function
return {
stop = function(self) stopped = true end;
timer;
};
end
local function delayed_ack_function(session, stanza)
-- fire event only if configured to do so and our session is not already hibernated or destroyed
if delayed_ack_timeout > 0 and session.awaiting_ack
and not session.hibernating and not session.destroyed then
session.log("debug", "Firing event 'smacks-ack-delayed', queue = %d",
session.outgoing_stanza_queue and #session.outgoing_stanza_queue or 0);
module:fire_event("smacks-ack-delayed", {origin = session, queue = session.outgoing_stanza_queue, stanza = stanza});
end
session.delayed_ack_timer = nil;
end
local function can_do_smacks(session, advertise_only)
if session.smacks then return false, "unexpected-request", "Stream management is already enabled"; end
local session_type = session.type;
if session.username then
if not(advertise_only) and not(session.resource) then -- Fail unless we're only advertising sm
return false, "unexpected-request", "Client must bind a resource before enabling stream management";
end
return true;
elseif s2s_smacks and (session_type == "s2sin" or session_type == "s2sout") then
return true;
end
return false, "service-unavailable", "Stream management is not available for this stream";
end
module:hook("stream-features",
function (event)
if can_do_smacks(event.origin, true) then
event.features:tag("sm", sm2_attr):tag("optional"):up():up();
event.features:tag("sm", sm3_attr):tag("optional"):up():up();
end
end);
module:hook("s2s-stream-features",
function (event)
if can_do_smacks(event.origin, true) then
event.features:tag("sm", sm2_attr):tag("optional"):up():up();
event.features:tag("sm", sm3_attr):tag("optional"):up():up();
end
end);
local function request_ack_if_needed(session, force, reason, stanza)
local queue = session.outgoing_stanza_queue;
local expected_h = session.last_acknowledged_stanza + #queue;
-- session.log("debug", "*** SMACKS(1) ***: awaiting_ack=%s, hibernating=%s", tostring(session.awaiting_ack), tostring(session.hibernating));
local max_unacked = max_unacked_stanzas;
if session.state == "inactive" then
max_unacked = max_inactive_unacked_stanzas;
end
if session.awaiting_ack == nil and not session.hibernating then
-- this check of last_requested_h prevents ack-loops if missbehaving clients report wrong
-- stanza counts. it is set when an <r> is really sent (e.g. inside timer), preventing any
-- further requests until a higher h-value would be expected.
-- session.log("debug", "*** SMACKS(2) ***: #queue=%s, max_unacked_stanzas=%s, expected_h=%s, last_requested_h=%s", tostring(#queue), tostring(max_unacked_stanzas), tostring(expected_h), tostring(session.last_requested_h));
if (#queue > max_unacked and expected_h ~= session.last_requested_h) or force then
session.log("debug", "Queuing <r> (in a moment) from %s - #queue=%d", reason, #queue);
session.awaiting_ack = false;
session.awaiting_ack_timer = stoppable_timer(1e-06, function ()
-- session.log("debug", "*** SMACKS(3) ***: awaiting_ack=%s, hibernating=%s", tostring(session.awaiting_ack), tostring(session.hibernating));
-- only request ack if needed and our session is not already hibernated or destroyed
if not session.awaiting_ack and not session.hibernating and not session.destroyed then
session.log("debug", "Sending <r> (inside timer, before send) from %s - #queue=%d", reason, #queue);
(session.sends2s or session.send)(st.stanza("r", { xmlns = session.smacks }))
if session.destroyed then return end -- sending something can trigger destruction
session.awaiting_ack = true;
-- expected_h could be lower than this expression e.g. more stanzas added to the queue meanwhile)
session.last_requested_h = session.last_acknowledged_stanza + #queue;
session.log("debug", "Sending <r> (inside timer, after send) from %s - #queue=%d", reason, #queue);
if not session.delayed_ack_timer then
session.delayed_ack_timer = stoppable_timer(delayed_ack_timeout, function()
delayed_ack_function(session, nil); -- we don't know if this is the only new stanza in the queue
end);
end
end
end);
end
end
-- Trigger "smacks-ack-delayed"-event if we added new (ackable) stanzas to the outgoing queue
-- and there isn't already a timer for this event running.
-- If we wouldn't do this, stanzas added to the queue after the first "smacks-ack-delayed"-event
-- would not trigger this event (again).
if #queue > max_unacked and session.awaiting_ack and session.delayed_ack_timer == nil then
session.log("debug", "Calling delayed_ack_function directly (still waiting for ack)");
delayed_ack_function(session, stanza); -- this is the only new stanza in the queue --> provide it to other modules
end
end
local function outgoing_stanza_filter(stanza, session)
-- XXX: Normally you wouldn't have to check the xmlns for a stanza as it's
-- supposed to be nil.
-- However, when using mod_smacks with mod_websocket, then mod_websocket's
-- stanzas/out filter can get called before this one and adds the xmlns.
local is_stanza = stanza.attr and
(not stanza.attr.xmlns or stanza.attr.xmlns == 'jabber:client')
and not stanza.name:find":";
if is_stanza and not stanza._cached then
local queue = session.outgoing_stanza_queue;
local cached_stanza = st.clone(stanza);
cached_stanza._cached = true;
if cached_stanza and cached_stanza.name ~= "iq" and cached_stanza:get_child("delay", xmlns_delay) == nil then
cached_stanza = cached_stanza:tag("delay", {
xmlns = xmlns_delay,
from = jid.bare(session.full_jid or session.host),
stamp = datetime.datetime()
});
end
queue[#queue+1] = cached_stanza;
if session.hibernating then
session.log("debug", "hibernating since %s, stanza queued", datetime.datetime(session.hibernating));
module:fire_event("smacks-hibernation-stanza-queued", {origin = session, queue = queue, stanza = cached_stanza});
return nil;
end
request_ack_if_needed(session, false, "outgoing_stanza_filter", stanza);
end
return stanza;
end
local function count_incoming_stanzas(stanza, session)
if not stanza.attr.xmlns then
session.handled_stanza_count = session.handled_stanza_count + 1;
session.log("debug", "Handled %d incoming stanzas", session.handled_stanza_count);
end
return stanza;
end
local function wrap_session_out(session, resume)
if not resume then
session.outgoing_stanza_queue = {};
session.last_acknowledged_stanza = 0;
end
add_filter(session, "stanzas/out", outgoing_stanza_filter, -999);
local session_close = session.close;
function session.close(...)
if session.resumption_token then
session_registry.set(session.username, session.resumption_token, nil);
old_session_registry.set(session.username, session.resumption_token, nil);
session.resumption_token = nil;
end
-- send out last ack as per revision 1.5.2 of XEP-0198
if session.smacks and session.conn and session.handled_stanza_count then
(session.sends2s or session.send)(st.stanza("a", { xmlns = session.smacks, h = string.format("%d", session.handled_stanza_count) }));
end
return session_close(...);
end
return session;
end
local function wrap_session_in(session, resume)
if not resume then
session.handled_stanza_count = 0;
end
add_filter(session, "stanzas/in", count_incoming_stanzas, 999);
return session;
end
local function wrap_session(session, resume)
wrap_session_out(session, resume);
wrap_session_in(session, resume);
return session;
end
function handle_enable(session, stanza, xmlns_sm)
local ok, err, err_text = can_do_smacks(session);
if not ok then
session.log("warn", "Failed to enable smacks: %s", err_text); -- TODO: XEP doesn't say we can send error text, should it?
(session.sends2s or session.send)(st.stanza("failed", { xmlns = xmlns_sm }):tag(err, { xmlns = xmlns_errors}));
return true;
end
module:log("debug", "Enabling stream management");
session.smacks = xmlns_sm;
wrap_session(session, false);
local resume_token;
local resume = stanza.attr.resume;
if resume == "true" or resume == "1" then
resume_token = uuid_generate();
session_registry.set(session.username, resume_token, session);
session.resumption_token = resume_token;
end
(session.sends2s or session.send)(st.stanza("enabled", { xmlns = xmlns_sm, id = resume_token, resume = resume, max = tostring(resume_timeout) }));
return true;
end
module:hook_stanza(xmlns_sm2, "enable", function (session, stanza) return handle_enable(session, stanza, xmlns_sm2); end, 100);
module:hook_stanza(xmlns_sm3, "enable", function (session, stanza) return handle_enable(session, stanza, xmlns_sm3); end, 100);
module:hook_stanza("http://etherx.jabber.org/streams", "features",
function (session, stanza)
stoppable_timer(1e-6, function ()
if can_do_smacks(session) then
if stanza:get_child("sm", xmlns_sm3) then
session.sends2s(st.stanza("enable", sm3_attr));
session.smacks = xmlns_sm3;
elseif stanza:get_child("sm", xmlns_sm2) then
session.sends2s(st.stanza("enable", sm2_attr));
session.smacks = xmlns_sm2;
else
return;
end
wrap_session_out(session, false);
end
end);
end);
function handle_enabled(session, stanza, xmlns_sm)
module:log("debug", "Enabling stream management");
session.smacks = xmlns_sm;
wrap_session_in(session, false);
-- FIXME Resume?
return true;
end
module:hook_stanza(xmlns_sm2, "enabled", function (session, stanza) return handle_enabled(session, stanza, xmlns_sm2); end, 100);
module:hook_stanza(xmlns_sm3, "enabled", function (session, stanza) return handle_enabled(session, stanza, xmlns_sm3); end, 100);
function handle_r(origin, stanza, xmlns_sm)
if not origin.smacks then
module:log("debug", "Received ack request from non-smack-enabled session");
return;
end
module:log("debug", "Received ack request, acking for %d", origin.handled_stanza_count);
-- Reply with <a>
(origin.sends2s or origin.send)(st.stanza("a", { xmlns = xmlns_sm, h = string.format("%d", origin.handled_stanza_count) }));
-- piggyback our own ack request if needed (see request_ack_if_needed() for explanation of last_requested_h)
local expected_h = origin.last_acknowledged_stanza + #origin.outgoing_stanza_queue;
if #origin.outgoing_stanza_queue > 0 and expected_h ~= origin.last_requested_h then
request_ack_if_needed(origin, true, "piggybacked by handle_r", nil);
end
return true;
end
module:hook_stanza(xmlns_sm2, "r", function (origin, stanza) return handle_r(origin, stanza, xmlns_sm2); end);
module:hook_stanza(xmlns_sm3, "r", function (origin, stanza) return handle_r(origin, stanza, xmlns_sm3); end);
function handle_a(origin, stanza)
if not origin.smacks then return; end
origin.awaiting_ack = nil;
if origin.awaiting_ack_timer then
origin.awaiting_ack_timer:stop();
end
if origin.delayed_ack_timer then
origin.delayed_ack_timer:stop();
origin.delayed_ack_timer = nil;
end
-- Remove handled stanzas from outgoing_stanza_queue
-- origin.log("debug", "ACK: h=%s, last=%s", stanza.attr.h or "", origin.last_acknowledged_stanza or "");
local h = tonumber(stanza.attr.h);
if not h then
origin:close{ condition = "invalid-xml"; text = "Missing or invalid 'h' attribute"; };
return;
end
local handled_stanza_count = h-origin.last_acknowledged_stanza;
local queue = origin.outgoing_stanza_queue;
if handled_stanza_count > #queue then
origin.log("warn", "The client says it handled %d new stanzas, but we only sent %d :)",
handled_stanza_count, #queue);
origin.log("debug", "Client h: %d, our h: %d", tonumber(stanza.attr.h), origin.last_acknowledged_stanza);
for i=1,#queue do
origin.log("debug", "Q item %d: %s", i, tostring(queue[i]));
end
origin:close{ condition = "undefined-condition"; text = "Client acknowledged more stanzas than sent by server"; };
return;
end
for i=1,math_min(handled_stanza_count,#queue) do
local handled_stanza = t_remove(origin.outgoing_stanza_queue, 1);
module:fire_event("delivery/success", { session = origin, stanza = handled_stanza });
end
origin.log("debug", "#queue = %d", #queue);
origin.last_acknowledged_stanza = origin.last_acknowledged_stanza + handled_stanza_count;
request_ack_if_needed(origin, false, "handle_a", nil)
return true;
end
module:hook_stanza(xmlns_sm2, "a", handle_a);
module:hook_stanza(xmlns_sm3, "a", handle_a);
--TODO: Optimise... incoming stanzas should be handled by a per-session
-- function that has a counter as an upvalue (no table indexing for increments,
-- and won't slow non-198 sessions). We can also then remove the .handled flag
-- on stanzas
local function handle_unacked_stanzas(session)
local queue = session.outgoing_stanza_queue;
local error_attr = { type = "cancel" };
if #queue > 0 then
session.outgoing_stanza_queue = {};
for i=1,#queue do
if not module:fire_event("delivery/failure", { session = session, stanza = queue[i] }) then
if queue[i].attr.type ~= "error" then
local reply = st.reply(queue[i]);
if reply.attr.to ~= session.full_jid then
reply.attr.type = "error";
reply:tag("error", error_attr)
:tag("recipient-unavailable", {xmlns = "urn:ietf:params:xml:ns:xmpp-stanzas"});
core_process_stanza(session, reply);
end
end
end
end
end
end
-- don't send delivery errors for messages which will be delivered by mam later on
-- check if stanza was archived --> this will allow us to send back errors for stanzas not archived
-- because the user configured the server to do so ("no-archive"-setting for one special contact for example)
local function get_stanza_id(stanza, by_jid)
for tag in stanza:childtags("stanza-id", "urn:xmpp:sid:0") do
if tag.attr.by == by_jid then
return tag.attr.id;
end
end
return nil;
end
module:hook("delivery/failure", function(event)
local session, stanza = event.session, event.stanza;
-- Only deal with authenticated (c2s) sessions
if session.username then
if stanza.name == "message" and stanza.attr.xmlns == nil and
( stanza.attr.type == "chat" or ( stanza.attr.type or "normal" ) == "normal" ) then
-- don't store messages in offline store if they are mam results
local mam_result = stanza:get_child("result", xmlns_mam2);
if mam_result ~= nil then
return true; -- stanza already "handled", don't send an error and don't add it to offline storage
end
-- do nothing here for normal messages and don't send out "message delivery errors",
-- because messages are already in MAM at this point (no need to frighten users)
local stanza_id = get_stanza_id(stanza, jid.bare(session.full_jid));
if session.mam_requested and stanza_id ~= nil then
session.log("debug", "mod_smacks delivery/failure returning true for mam-handled stanza: mam-archive-id=%s", tostring(stanza_id));
return true; -- stanza handled, don't send an error
end
-- store message in offline store, if this client does not use mam *and* was the last client online
local sessions = prosody.hosts[module.host].sessions[session.username] and
prosody.hosts[module.host].sessions[session.username].sessions or nil;
if sessions and next(sessions) == session.resource and next(sessions, session.resource) == nil then
local ok = module:fire_event("message/offline/handle", { origin = session, username = session.username, stanza = stanza });
session.log("debug", "mod_smacks delivery/failuere returning %s for offline-handled stanza", tostring(ok));
return ok; -- if stanza was handled, don't send an error
end
end
end
end);
module:hook("pre-resource-unbind", function (event)
local session, err = event.session, event.error;
if session.smacks then
if not session.resumption_token then
local queue = session.outgoing_stanza_queue;
if #queue > 0 then
session.log("debug", "Destroying session with %d unacked stanzas", #queue);
handle_unacked_stanzas(session);
end
else
session.log("debug", "mod_smacks hibernating session for up to %d seconds", resume_timeout);
local hibernate_time = os_time(); -- Track the time we went into hibernation
session.hibernating = hibernate_time;
local resumption_token = session.resumption_token;
module:fire_event("smacks-hibernation-start", {origin = session, queue = session.outgoing_stanza_queue});
timer.add_task(resume_timeout, function ()
session.log("debug", "mod_smacks hibernation timeout reached...");
-- We need to check the current resumption token for this resource
-- matches the smacks session this timer is for in case it changed
-- (for example, the client may have bound a new resource and
-- started a new smacks session, or not be using smacks)
local curr_session = full_sessions[session.full_jid];
if session.destroyed then
session.log("debug", "The session has already been destroyed");
elseif curr_session and curr_session.resumption_token == resumption_token
-- Check the hibernate time still matches what we think it is,
-- otherwise the session resumed and re-hibernated.
and session.hibernating == hibernate_time then
-- wait longer if the timeout isn't reached because push was enabled for this session
-- session.first_hibernated_push is the starting point for hibernation timeouts of those push enabled clients
-- wait for an additional resume_timeout seconds if no push occurred since hibernation at all
local current_time = os_time();
local timeout_start = math_max(session.hibernating, session.first_hibernated_push or session.hibernating);
if session.push_identifier ~= nil and not session.first_hibernated_push then
session.log("debug", "No push happened since hibernation started, hibernating session for up to %d extra seconds", resume_timeout);
return resume_timeout;
end
if session.push_identifier ~= nil and current_time-timeout_start < resume_timeout then
session.log("debug", "A push happened since hibernation started, hibernating session for up to %d extra seconds", resume_timeout-(current_time-timeout_start));
return resume_timeout-(current_time-timeout_start); -- time left to wait
end
session.log("debug", "Destroying session for hibernating too long");
session_registry.set(session.username, session.resumption_token, nil);
-- save only actual h value and username/host (for security)
old_session_registry.set(session.username, session.resumption_token, {
h = session.handled_stanza_count,
username = session.username,
host = session.host
});
session.resumption_token = nil;
sessionmanager.destroy_session(session);
else
session.log("debug", "Session resumed before hibernation timeout, all is well")
end
end);
return true; -- Postpone destruction for now
end
end
end);
local function handle_s2s_destroyed(event)
local session = event.session;
local queue = session.outgoing_stanza_queue;
if queue and #queue > 0 then
session.log("warn", "Destroying session with %d unacked stanzas", #queue);
if s2s_resend then
for i = 1, #queue do
module:send(queue[i]);
end
session.outgoing_stanza_queue = nil;
else
handle_unacked_stanzas(session);
end
end
end
module:hook("s2sout-destroyed", handle_s2s_destroyed);
module:hook("s2sin-destroyed", handle_s2s_destroyed);
local function get_session_id(session)
return session.id or (tostring(session):match("[a-f0-9]+$"));
end
function handle_resume(session, stanza, xmlns_sm)
if session.full_jid then
session.log("warn", "Tried to resume after resource binding");
session.send(st.stanza("failed", { xmlns = xmlns_sm })
:tag("unexpected-request", { xmlns = xmlns_errors })
);
return true;
end
local id = stanza.attr.previd;
local original_session = session_registry.get(session.username, id);
if not original_session then
session.log("debug", "Tried to resume non-existent session with id %s", id);
local old_session = old_session_registry.get(session.username, id);
if old_session and session.username == old_session.username
and session.host == old_session.host
and old_session.h then
session.send(st.stanza("failed", { xmlns = xmlns_sm, h = string.format("%d", old_session.h) })
:tag("item-not-found", { xmlns = xmlns_errors })
);
else
session.send(st.stanza("failed", { xmlns = xmlns_sm })
:tag("item-not-found", { xmlns = xmlns_errors })
);
end;
elseif session.username == original_session.username
and session.host == original_session.host then
session.log("debug", "mod_smacks resuming existing session %s...", get_session_id(original_session));
original_session.log("debug", "mod_smacks session resumed from %s...", get_session_id(session));
-- TODO: All this should move to sessionmanager (e.g. session:replace(new_session))
if original_session.conn then
original_session.log("debug", "mod_smacks closing an old connection for this session");
local conn = original_session.conn;
c2s_sessions[conn] = nil;
conn:close();
end
local migrated_session_log = session.log;
original_session.ip = session.ip;
original_session.conn = session.conn;
original_session.send = session.send;
original_session.close = session.close;
original_session.filter = session.filter;
original_session.filter.session = original_session;
original_session.filters = session.filters;
original_session.stream = session.stream;
original_session.secure = session.secure;
original_session.hibernating = nil;
session.log = original_session.log;
session.type = original_session.type;
wrap_session(original_session, true);
-- Inform xmppstream of the new session (passed to its callbacks)
original_session.stream:set_session(original_session);
-- Similar for connlisteners
c2s_sessions[session.conn] = original_session;
original_session.send(st.stanza("resumed", { xmlns = xmlns_sm,
h = string.format("%d", original_session.handled_stanza_count), previd = id }));
-- Fake an <a> with the h of the <resume/> from the client
original_session:dispatch_stanza(st.stanza("a", { xmlns = xmlns_sm,
h = stanza.attr.h }));
-- Ok, we need to re-send any stanzas that the client didn't see
-- ...they are what is now left in the outgoing stanza queue
-- We have to use the send of "session" because we don't want to add our resent stanzas
-- to the outgoing queue again
local queue = original_session.outgoing_stanza_queue;
session.log("debug", "resending all unacked stanzas that are still queued after resume, #queue = %d", #queue);
for i=1,#queue do
session.send(queue[i]);
end
session.log("debug", "all stanzas resent, now disabling send() in this migrated session, #queue = %d", #queue);
function session.send(stanza)
migrated_session_log("error", "Tried to send stanza on old session migrated by smacks resume (maybe there is a bug?): %s", tostring(stanza));
return false;
end
module:fire_event("smacks-hibernation-end", {origin = session, resumed = original_session, queue = queue});
request_ack_if_needed(original_session, true, "handle_resume", nil);
else
module:log("warn", "Client %s@%s[%s] tried to resume stream for %s@%s[%s]",
session.username or "?", session.host or "?", session.type,
original_session.username or "?", original_session.host or "?", original_session.type);
session.send(st.stanza("failed", { xmlns = xmlns_sm })
:tag("not-authorized", { xmlns = xmlns_errors }));
end
return true;
end
module:hook_stanza(xmlns_sm2, "resume", function (session, stanza) return handle_resume(session, stanza, xmlns_sm2); end);
module:hook_stanza(xmlns_sm3, "resume", function (session, stanza) return handle_resume(session, stanza, xmlns_sm3); end);
module:hook("csi-client-active", function (event)
if event.origin.smacks then
request_ack_if_needed(event.origin, true, "csi-active", nil);
end
end);
module:hook("csi-flushing", function(event)
local session = event.session;
if session.smacks then
if not session.awaiting_ack and not session.hibernating and not session.destroyed then
session.log("debug", "Sending <r> (csi-flushing)");
session.awaiting_ack = true; -- The send() call may invoke this event again, so set this first
(session.sends2s or session.send)(st.stanza("r", { xmlns = session.smacks }))
end
end
end);
local function handle_read_timeout(event)
local session = event.session;
if session.smacks then
if session.awaiting_ack then
if session.awaiting_ack_timer then
session.awaiting_ack_timer:stop();
end
if session.delayed_ack_timer then
session.delayed_ack_timer:stop();
session.delayed_ack_timer = nil;
end
return false; -- Kick the session
end
session.log("debug", "Sending <r> (read timeout)");
(session.sends2s or session.send)(st.stanza("r", { xmlns = session.smacks }));
session.awaiting_ack = true;
if not session.delayed_ack_timer then
session.delayed_ack_timer = stoppable_timer(delayed_ack_timeout, function()
delayed_ack_function(session, nil);
end);
end
return true;
end
end
module:hook("s2s-read-timeout", handle_read_timeout);
module:hook("c2s-read-timeout", handle_read_timeout);