forked from aws/aws-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtopictags.py
295 lines (251 loc) · 12.4 KB
/
topictags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# Copyright (c) 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
import os
import json
import docutils.core
class TopicTagDB(object):
"""This class acts like a database for the tags of all available topics.
A tag is an element in a topic reStructured text file that contains
information about a topic. Information can range from titles to even
related CLI commands. Here are all of the currently supported tags:
Tag Meaning Required?
--- ------- ---------
:title: The title of the topic Yes
:description: Sentence description of topic Yes
:category: Category topic falls under Yes
:related topic: A related topic No
:related command: A related command No
To see examples of how to specify tags, look in the directory
awscli/topics. Note that tags can have multiple values by delimiting
values with commas. All tags must be on their own line in the file.
This class can load a JSON index represeting all topics and their tags,
scan all of the topics and store the values of their tags, retrieve the
tag value for a particular topic, query for all the topics with a specific
tag and/or value, and save the loaded data back out to a JSON index.
The structure of the database can be viewed as a python dictionary:
{'topic-name-1': {
'title': ['My First Topic Title'],
'description': ['This describes my first topic'],
'category': ['General Topics', 'S3'],
'related command': ['aws s3'],
'related topic': ['topic-name-2']
},
'topic-name-2': { .....
}
The keys of the dictionary are the CLI command names of the topics. These
names are based off the name of the reStructed text file that corresponds
to the topic. The value of these keys are dictionaries of tags, where the
tags are keys and their value is a list of values for that tag. Note
that all tag values for a specific tag of a specific topic are unique.
"""
VALID_TAGS = ['category', 'description', 'title', 'related topic',
'related command']
# The default directory to look for topics.
TOPIC_DIR = os.path.join(
os.path.dirname(
os.path.abspath(__file__)), 'topics')
# The default JSON index to load.
JSON_INDEX = os.path.join(TOPIC_DIR, 'topic-tags.json')
def __init__(self, tag_dictionary=None, index_file=JSON_INDEX,
topic_dir=TOPIC_DIR):
"""
:param index_file: The path to a specific JSON index to load.
If nothing is specified it will default to the default JSON
index at ``JSON_INDEX``.
:param topic_dir: The path to the directory where to retrieve
the topic source files. Note that if you store your index
in this directory, you must supply the full path to the json
index to the ``file_index`` argument as it may not be ignored when
listing topic source files. If nothing is specified it will
default to the default directory at ``TOPIC_DIR``.
"""
self._tag_dictionary = tag_dictionary
if self._tag_dictionary is None:
self._tag_dictionary = {}
self._index_file = index_file
self._topic_dir = topic_dir
@property
def index_file(self):
return self._index_file
@index_file.setter
def index_file(self, value):
self._index_file = value
@property
def topic_dir(self):
return self._topic_dir
@topic_dir.setter
def topic_dir(self, value):
self._topic_dir = value
@property
def valid_tags(self):
return self.VALID_TAGS
def load_json_index(self):
"""Loads a JSON file into the tag dictionary."""
with open(self.index_file, 'r') as f:
self._tag_dictionary = json.load(f)
def save_to_json_index(self):
"""Writes the loaded data back out to the JSON index."""
with open(self.index_file, 'w') as f:
f.write(json.dumps(self._tag_dictionary, indent=4, sort_keys=True))
def get_all_topic_names(self):
"""Retrieves all of the topic names of the loaded JSON index"""
return list(self._tag_dictionary)
def get_all_topic_src_files(self):
"""Retrieves the file paths of all the topics in directory"""
topic_full_paths = []
topic_names = os.listdir(self.topic_dir)
for topic_name in topic_names:
# Do not try to load hidden files.
if not topic_name.startswith('.'):
topic_full_path = os.path.join(self.topic_dir, topic_name)
# Ignore the JSON Index as it is stored with topic files.
if topic_full_path != self.index_file:
topic_full_paths.append(topic_full_path)
return topic_full_paths
def scan(self, topic_files):
"""Scan in the tags of a list of topics into memory.
Note that if there are existing values in an entry in the database
of tags, they will not be overwritten. Any new values will be
appended to original values.
:param topic_files: A list of paths to topics to scan into memory.
"""
for topic_file in topic_files:
with open(topic_file, 'r') as f:
# Parse out the name of the topic
topic_name = self._find_topic_name(topic_file)
# Add the topic to the dictionary if it does not exist
self._add_topic_name_to_dict(topic_name)
topic_content = f.read()
# Record the tags and the values
self._add_tag_and_values_from_content(
topic_name, topic_content)
def _find_topic_name(self, topic_src_file):
# Get the name of each of these files
topic_name_with_ext = os.path.basename(topic_src_file)
# Strip of the .rst extension from the files
return topic_name_with_ext[:-4]
def _add_tag_and_values_from_content(self, topic_name, content):
# Retrieves tags and values and adds from content of topic file
# to the dictionary.
doctree = docutils.core.publish_doctree(content).asdom()
fields = doctree.getElementsByTagName('field')
for field in fields:
field_name = field.getElementsByTagName('field_name')[0]
field_body = field.getElementsByTagName('field_body')[0]
# Get the tag.
tag = field_name.firstChild.nodeValue
if tag in self.VALID_TAGS:
# Get the value of the tag.
values = field_body.childNodes[0].firstChild.nodeValue
# Seperate values into a list by splitting at commas
tag_values = values.split(',')
# Strip the white space around each of these values.
for i in range(len(tag_values)):
tag_values[i] = tag_values[i].strip()
self._add_tag_to_dict(topic_name, tag, tag_values)
else:
raise ValueError(
"Tag %s found under topic %s is not supported."
% (tag, topic_name)
)
def _add_topic_name_to_dict(self, topic_name):
# This method adds a topic name to the dictionary if it does not
# already exist
# Check if the topic is in the topic tag dictionary
if self._tag_dictionary.get(topic_name, None) is None:
self._tag_dictionary[topic_name] = {}
def _add_tag_to_dict(self, topic_name, tag, values):
# This method adds a tag to the dictionary given its tag and value
# If there are existing values associated to the tag it will add
# only values that previously did not exist in the list.
# Add topic to the topic tag dictionary if needed.
self._add_topic_name_to_dict(topic_name)
# Get all of a topics tags
topic_tags = self._tag_dictionary[topic_name]
self._add_key_values(topic_tags, tag, values)
def _add_key_values(self, dictionary, key, values):
# This method adds a value to a dictionary given a key.
# If there are existing values associated to the key it will add
# only values that previously did not exist in the list. All values
# in the dictionary should be lists
if dictionary.get(key, None) is None:
dictionary[key] = []
for value in values:
if value not in dictionary[key]:
dictionary[key].append(value)
def query(self, tag, values=None):
"""Groups topics by a specific tag and/or tag value.
:param tag: The name of the tag to query for.
:param values: A list of tag values to only include in query.
If no value is provided, all possible tag values will be returned
:rtype: dictionary
:returns: A dictionary whose keys are all possible tag values and the
keys' values are all of the topic names that had that tag value
in its source file. For example, if ``topic-name-1`` had the tag
``:category: foo, bar`` and ``topic-name-2`` had the tag
``:category: foo`` and we queried based on ``:category:``,
the returned dictionary would be:
{
'foo': ['topic-name-1', 'topic-name-2'],
'bar': ['topic-name-1']
}
"""
query_dict = {}
for topic_name in self._tag_dictionary.keys():
# Get the tag values for a specified tag of the topic
if self._tag_dictionary[topic_name].get(tag, None) is not None:
tag_values = self._tag_dictionary[topic_name][tag]
for tag_value in tag_values:
# Add the values to dictionary to be returned if
# no value constraints are provided or if the tag value
# falls in the allowed tag values.
if values is None or tag_value in values:
self._add_key_values(query_dict,
key=tag_value,
values=[topic_name])
return query_dict
def get_tag_value(self, topic_name, tag, default_value=None):
"""Get a value of a tag for a topic
:param topic_name: The name of the topic
:param tag: The name of the tag to retrieve
:param default_value: The value to return if the topic and/or tag
does not exist.
"""
if topic_name in self._tag_dictionary:
return self._tag_dictionary[topic_name].get(tag, default_value)
return default_value
def get_tag_single_value(self, topic_name, tag):
"""Get the value of a tag for a topic (i.e. not wrapped in a list)
:param topic_name: The name of the topic
:param tag: The name of the tag to retrieve
:raises VauleError: Raised if there is not exactly one value
in the list value.
"""
value = self.get_tag_value(topic_name, tag)
if value is not None:
if len(value) != 1:
raise ValueError(
'Tag %s for topic %s has value %. Expected a single '
'element in list.' % (tag, topic_name, value)
)
value = value[0]
return value