devicehub-django/evidence/xapian.py

76 lines
2.1 KiB
Python
Raw Permalink Normal View History

2024-07-15 14:23:14 +00:00
import xapian
2024-11-22 20:46:33 +00:00
from datetime import datetime
2024-07-15 14:23:14 +00:00
2024-07-18 15:21:22 +00:00
# database = xapian.WritableDatabase("db", xapian.DB_CREATE_OR_OPEN)
2024-07-15 14:23:14 +00:00
2024-07-18 15:21:22 +00:00
# Read Only
# database = xapian.Database("db")
# indexer = xapian.TermGenerator()
# stemmer = xapian.Stem("english")
# indexer.set_stemmer(stemmer)
2024-07-15 14:23:14 +00:00
def search(institution, qs, offset=0, limit=10):
2024-10-25 15:36:13 +00:00
try:
database = xapian.Database("db")
except (xapian.DatabaseNotFoundError, xapian.DatabaseOpeningError):
return
2024-07-15 14:23:14 +00:00
qp = xapian.QueryParser()
qp.set_database(database)
2024-07-18 15:21:22 +00:00
qp.set_stemmer(xapian.Stem("english"))
2024-07-15 14:23:14 +00:00
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
2024-07-18 15:21:22 +00:00
qp.add_prefix("uuid", "uuid")
query = qp.parse_query(qs)
institution_term = "U{}".format(institution.id)
final_query = xapian.Query(
xapian.Query.OP_AND, query, xapian.Query(institution_term)
)
2024-07-15 14:23:14 +00:00
enquire = xapian.Enquire(database)
enquire.set_query(final_query)
2024-11-22 20:46:33 +00:00
enquire.set_sort_by_value_then_relevance(0, True)
#colapse key is device_id
enquire.set_collapse_key(1)
2024-07-15 14:23:14 +00:00
matches = enquire.get_mset(offset, limit)
return matches
2024-07-18 15:21:22 +00:00
2024-11-22 20:46:33 +00:00
def index(institution, device_id, uuid, timestamp, snap):
2024-07-18 15:21:22 +00:00
uuid = 'uuid:"{}"'.format(uuid)
2024-10-25 15:36:13 +00:00
matches = search(institution, uuid, limit=1)
if matches and matches.size() > 0:
return
2024-07-18 15:21:22 +00:00
database = xapian.WritableDatabase("db", xapian.DB_CREATE_OR_OPEN)
indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
indexer.set_stemmer(stemmer)
doc = xapian.Document()
doc.set_data(snap)
indexer.set_document(doc)
indexer.index_text(snap)
indexer.index_text(uuid, 10, "uuid")
# indexer.index_text(snap, 1, "snapshot")
2024-11-22 20:46:33 +00:00
# store device_id, uuid and timestamp
doc.add_value(1, device_id)
doc.add_value(2, str(uuid))
try:
timestamp_dt = datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f')
timestamp_unix = timestamp_dt.timestamp()
doc.add_value(0, xapian.sortable_serialise(timestamp_unix))
except ValueError as e:
print(f"Error parsing timestamp: {e}")
institution_term = "U{}".format(institution.id)
doc.add_term(institution_term)
2024-07-18 15:21:22 +00:00
database.add_document(doc)