Ingest notification matches to ES

This commit is contained in:
2023-02-02 20:04:55 +00:00
parent 79b4512546
commit df1e82c5f2
5 changed files with 80 additions and 9 deletions

View File

@@ -20,6 +20,24 @@ from core.lib.parsing import (
parse_source,
)
# These are sometimes numeric, sometimes strings.
# If they are seen to be numeric first, ES will erroneously
# index them as "long" and then subsequently fail to index messages
# with strings in the field.
keyword_fields = ["nick_id", "user_id", "net_id"]
mapping = {
"mappings": {
"properties": {
"ts": {"type": "date", "format": "epoch_second"},
"match_ts": {"type": "date", "format": "iso8601"},
"file_tim": {"type": "date", "format": "epoch_millis"},
}
}
}
for field in keyword_fields:
mapping["mappings"]["properties"][field] = {"type": "text"}
class ElasticsearchBackend(StorageBackend):
def __init__(self):
@@ -41,12 +59,24 @@ class ElasticsearchBackend(StorageBackend):
"""
Inititialise the Elasticsearch API endpoint in async mode.
"""
global mapping
auth = (settings.ELASTICSEARCH_USERNAME, settings.ELASTICSEARCH_PASSWORD)
client = AsyncElasticsearch(
settings.ELASTICSEARCH_URL, http_auth=auth, verify_certs=False
)
self.async_client = client
# Create the rule storage indices
if await client.indices.exists(index=settings.INDEX_RULE_STORAGE):
await client.indices.put_mapping(
index=settings.INDEX_RULE_STORAGE,
properties=mapping["mappings"]["properties"],
)
else:
await client.indices.create(
index=settings.INDEX_RULE_STORAGE, mappings=mapping["mappings"]
)
def construct_context_query(
self, index, net, channel, src, num, size, type=None, nicks=None
):
@@ -232,6 +262,23 @@ class ElasticsearchBackend(StorageBackend):
return err
return response
async def async_store_matches(self, matches):
"""
Store a list of matches in Elasticsearch.
:param index: The index to store the matches in.
:param matches: A list of matches to store.
"""
if self.async_client is None:
await self.async_initialise()
for match in matches:
print("INDEXING", match)
result = await self.async_client.index(
index=settings.INDEX_RULE_STORAGE, body=match
)
if not result["result"] == "created":
self.log.error(f"Indexing failed: {result}")
self.log.debug(f"Indexed {len(matches)} messages in ES")
async def schedule_query_results(self, rule_object):
"""
Helper to run a scheduled query with reduced functionality and async.