From f7392cf31d681fedf4496661595c38dfa567235f Mon Sep 17 00:00:00 2001 From: Thomas Rusiecki Date: Fri, 22 Nov 2024 17:46:33 -0300 Subject: [PATCH 1/3] changed xapian index to include hid --- evidence/xapian.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/evidence/xapian.py b/evidence/xapian.py index 98da706..2bfc92b 100644 --- a/evidence/xapian.py +++ b/evidence/xapian.py @@ -1,4 +1,5 @@ import xapian +from datetime import datetime # database = xapian.WritableDatabase("db", xapian.DB_CREATE_OR_OPEN) @@ -28,11 +29,17 @@ def search(institution, qs, offset=0, limit=10): ) enquire = xapian.Enquire(database) enquire.set_query(final_query) + + enquire.set_sort_by_value_then_relevance(0, True) + + #colapse key is device_id + enquire.set_collapse_key(1) + matches = enquire.get_mset(offset, limit) return matches -def index(institution, uuid, snap): +def index(institution, device_id, uuid, timestamp, snap): uuid = 'uuid:"{}"'.format(uuid) matches = search(institution, uuid, limit=1) if matches and matches.size() > 0: @@ -50,6 +57,18 @@ def index(institution, uuid, snap): indexer.index_text(snap) indexer.index_text(uuid, 10, "uuid") # indexer.index_text(snap, 1, "snapshot") + + # store device_id, uuid and timestamp + doc.add_value(1, device_id) + doc.add_value(2, str(uuid)) + + try: + timestamp_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f') + timestamp_unix = timestamp_dt.timestamp() + doc.add_value(0, xapian.sortable_serialise(timestamp_unix)) + except ValueError as e: + print(f"Error parsing timestamp: {e}") + institution_term = "U{}".format(institution.id) doc.add_term(institution_term) -- 2.30.2 From 0a9368922a597a888816ef9f3be2b4a8e2d23b73 Mon Sep 17 00:00:00 2001 From: Thomas Rusiecki Date: Fri, 22 Nov 2024 17:47:01 -0300 Subject: [PATCH 2/3] snapshot correct indexing --- dashboard/views.py | 8 +------- evidence/parse.py | 4 +++- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/dashboard/views.py b/dashboard/views.py index 8d91732..8d96ddd 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -70,14 +70,8 @@ class SearchView(InventaryMixin): return self.search_hids(query, offset, limit) devices = [] - dev_id = [] - for x in matches: - # devices.append(self.get_annotations(x)) - dev = self.get_annotations(x) - if dev.id not in dev_id: - devices.append(dev) - dev_id.append(dev.id) + devices.append(self.get_annotations(x)) count = matches.size() # TODO fix of pagination, the count is not correct diff --git a/evidence/parse.py b/evidence/parse.py index fd68e06..26faec0 100644 --- a/evidence/parse.py +++ b/evidence/parse.py @@ -49,8 +49,10 @@ class Build: self.create_annotations() def index(self): + timestamp = self.json['timestamp'] snap = json.dumps(self.json) - index(self.user.institution, self.uuid, snap) + + index(self.user.institution, self.get_hid(self.json) , self.uuid, timestamp, snap) def generate_chids(self): self.algorithms = { -- 2.30.2 From dfdc2150d0a4ecb521c30b0b4596c9043a2124d7 Mon Sep 17 00:00:00 2001 From: Thomas Rusiecki Date: Fri, 22 Nov 2024 17:52:30 -0300 Subject: [PATCH 3/3] added placeholder id for xapian indexing --- utils/device.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/device.py b/utils/device.py index beaa4b8..76bc56f 100644 --- a/utils/device.py +++ b/utils/device.py @@ -101,6 +101,8 @@ def create_index(doc, user): if not doc or not doc.get('uuid'): return [] + _timestamp = doc['endTime'] _uuid = doc['uuid'] + _device_id = doc['CUSTOMER_ID'] ev = json.dumps(doc) - index(user.institution, _uuid, ev) + index(user.institution, _device_id, _uuid, _timestamp, ev) -- 2.30.2