-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsearch_service.py
53 lines (39 loc) · 1.11 KB
/
search_service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
:Mod: search_service
:Synopsis:
:Author:
servilla
:Created:
2/13/22
"""
import pickle
from urllib.parse import urlparse
import aiohttp
import daiquiri
import starlette.status as status
import bugle.index.load as load
from bugle.index.index import Index
from bugle.index.webpage import WebPage
from config import Config
logger = daiquiri.getLogger(__name__)
def search_on_terms(terms: str) -> list:
pages = []
with open(f"{Config.CACHE}/index.pkl", "rb") as f:
index = pickle.load(f)
hits = index.search(terms, rank=True)
term_list = terms.split()
for hit in hits:
webpage: WebPage = hit[0]
parse = urlparse(webpage.url)
pages.append(
{
"title": webpage.title,
"url": webpage.url,
"path": parse.path,
"terms": ",".join([_ for _ in term_list if _.lower() in webpage.fulltext.lower()]),
"tfrag": "&".join([f"text={_}" for _ in term_list if _.lower() in webpage.fulltext.lower()])
}
)
return pages