forked from pgadmin-org/pgadmin4
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsql_keywords.py
106 lines (76 loc) · 3.06 KB
/
sql_keywords.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# -*- coding: utf-8 -*-
##########################################################################
#
# pgAdmin 4 - PostgreSQL Tools
#
# Copyright (C) 2013 - 2025, The pgAdmin Development Team
# This software is released under the PostgreSQL Licence
#
##########################################################################
# This utility will extract SQL keywords from postgres website and
# pgsql keywords from the code git paths mentioned in PG_CODES_URLS
# Note that, PG_CODES_URLS may need to be changed manually per version change
import re
import requests
import argparse
PG_CODES_URLS = [
"https://raw.githubusercontent.com/postgres/postgres/master/src/pl/"
"plpgsql/src/pl_scanner.c",
]
PG_CODES_REGEX = r"PG_KEYWORD\(\"([a-z]*)\"[A-Z_, ]*\)"
PG_SQL_DOCS_URL = \
"https://www.postgresql.org/docs/current/sql-keywords-appendix.html"
PG_SQL_DOCS_REGEX = "<[a-z =\"]*>([A-Z_]*)"
PG_CURRENT_VERSION_URL = "https://www.postgresql.org/docs/current/index.html"
PG_CURRENT_VERSION_REGEX = "PostgreSQL ([0-9.]+) Documentation"
def apply_regex(text, regex):
return re.findall(regex, text)
def get_file_from_url(url):
req = requests.get(url)
return req.text
def extract_keywords(text, regex):
keywords = apply_regex(text, regex)
return [k.lower() for k in keywords]
def get_release_tag(current_url=PG_CURRENT_VERSION_URL,
version_regex=PG_CURRENT_VERSION_REGEX):
resp_text = get_file_from_url(current_url)
version = apply_regex(resp_text, version_regex)
if isinstance(version, list):
version = version[0]
return "REL_" + version.replace(".", "_")
def get_keywords_pg_code(file_urls=PG_CODES_URLS,
keyword_regex=PG_CODES_REGEX):
keywords = []
# Lets get the latest version first
rel_tag = get_release_tag()
for file_url in file_urls:
if "hb" not in file_url:
file_url = file_url + ";hb=" + rel_tag
resp_text = get_file_from_url(file_url)
# Sample entry - PG_KEYWORD("begin", K_BEGIN, RESERVED_KEYWORD)
keywords.extend(extract_keywords(resp_text, keyword_regex))
return keywords
def get_keywords_pg_docs(docs_url=PG_SQL_DOCS_URL,
keyword_regex=PG_SQL_DOCS_REGEX):
resp_text = get_file_from_url(docs_url)
# Sample entry - <code class="token">ABORT</code>
keywords = extract_keywords(resp_text, keyword_regex)
return keywords
def get_all_keywords():
final_keywords = set()
final_keywords.update(get_keywords_pg_code())
final_keywords.update(get_keywords_pg_docs())
return len(final_keywords), " ".join(sorted(list(final_keywords))).strip()
if __name__ == '__main__':
args_parser = argparse.ArgumentParser(description="SQL Keywords extractor")
args_parser.add_argument(
'--total',
help="Print with total number of keywords",
action="store_true"
)
args = args_parser.parse_args()
total, keywords = get_all_keywords()
if args.total:
print(keywords + "%s\n\n%d keywords extracted." % (keywords, total))
else:
print(keywords)