forked from adulau/cve-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch_fulltext.py
executable file
·112 lines (100 loc) · 4.28 KB
/
search_fulltext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Search the CVE full text database
#
# Software is free software released under the "Modified BSD license"
#
# Copyright (c) 2012-2015 Alexandre Dulaunoy - [email protected]
# Copyright (c) 2015 Pieter-Jan Moreels - [email protected]
import os
from whoosh import index, qparser
from whoosh.fields import Schema, TEXT, ID
from whoosh.qparser import QueryParser
import sys
import argparse
import json
from bson import json_util
runPath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(runPath, ".."))
from lib.Config import Configuration
indexpath = Configuration.getIndexdir()
#basepath = os.path.join(os.sep, *os.path.dirname(os.path.realpath(__file__)).rsplit('/')[:-1])
#print (os.path.split(os.path.join(basepath,indexpath)))
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = index.open_dir(indexpath)
argParser = argparse.ArgumentParser(description='Full text search for cve-search')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
argParser.add_argument('-o', action='store_true', help='OR of the query to lookup (default is AND')
argParser.add_argument('-t', action='store_true', help='output title of the match CVE(s)')
argParser.add_argument('-f', action='store_true', help='output matching CVE(s) in JSON')
argParser.add_argument('-m', type=int, default=False, help='most frequent terms in CVE description (m is top-m values)')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in CVE description')
argParser.add_argument('-g', action='store_true', default=False, help='graph of most frequent terms with each matching CVE (JSON output)')
argParser.add_argument('-s', action='store_true', default=False, help='enable stemming on graph JSON output (default is False)')
argParser.add_argument('-n', action='store_true', help='lookup complete cpe (Common Platform Enumeration) name for vulnerable configuration')
argParser.add_argument('-r', action='store_true', help='lookup ranking of vulnerable configuration')
args = argParser.parse_args()
if not args.q and not args.l and not args.g and not args.m:
argParser.print_help()
exit(1)
if args.f or args.t:
from lib import CVEs
cves = CVEs.last(rankinglookup=args.r, namelookup=args.n)
if args.q:
with ix.searcher() as searcher:
if not args.o:
query = QueryParser("content", ix.schema).parse(" ".join(args.q))
else:
query = QueryParser("content", schema=ix.schema, group=qparser.OrGroup).parse(" ".join(args.q))
results = searcher.search(query, limit=None)
for x in results:
if not args.f:
print (x['path'])
else:
print(json.dumps(cves.getcve(x['path']), sort_keys=True, default=json_util.default))
if args.t and not args.f:
print (" -- " + x['title'])
elif args.m:
xr = ix.searcher().reader()
for x in xr.most_frequent_terms("content", number=args.m):
sys.stdout.write(str(int(x[0])))
sys.stdout.write(",")
sys.stdout.write(x[1].decode('utf-8'))
sys.stdout.write("\n")
elif args.l and not args.g:
xr = ix.searcher().reader()
for x in xr.lexicon("content"):
print (x)
elif args.g:
import json
if args.s:
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
lmtzr = WordNetLemmatizer()
xr = ix.searcher().reader()
s = {"name": 'cve-search', "children": []}
d = {}
for x in xr.most_frequent_terms("content", 3000):
query = QueryParser("content", ix.schema).parse(x[1])
if args.s:
term = lmtzr.lemmatize(x[1].decode('utf-8'), 'v')
if term in stopwords.words('english'):
continue
else:
term = x[1]
term = term.decode('utf-8')
if term in d:
d[term]['size'] = d[term]['size'] + int(x[0])
else:
d[term] = {}
d[term]['size'] = int(x[0])
for k in sorted(d.keys(), key=lambda y: (d[y]['size']), reverse=True):
v = {}
v["name"] = k
v["size"] = d[k]['size']
s['children'].append(v)
print (json.dumps(s, indent=4))
else:
argParser.print_help()
exit(1)