This repository has been archived by the owner on Nov 9, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 264
/
Copy pathmake_otu_heatmap.py
executable file
·254 lines (228 loc) · 11.1 KB
/
make_otu_heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/usr/bin/env python
from __future__ import division
__author__ = "Dan Knights"
__copyright__ = "Copyright 2011, The QIIME project"
__credits__ = [
"Dan Knights",
"Jose Carlos Clemente Litran",
"Yoshiki Vazquez Baeza",
"Greg Caporaso",
"Jai Ram Rideout"]
__license__ = "GPL"
__version__ = "1.9.1-dev"
__maintainer__ = "Dan Knights"
__email__ = "[email protected]"
import numpy as np
from biom import load_table
from qiime.make_otu_heatmap import (
plot_heatmap, get_clusters, make_otu_labels, extract_metadata_column,
get_order_from_categories, get_order_from_tree, names_to_indices,
get_log_transform, get_overlapping_samples)
from qiime.util import (parse_command_line_parameters, get_options_lookup,
make_option, MissingFileError)
from qiime.parse import parse_mapping_file
options_lookup = get_options_lookup()
script_info = {}
script_info['brief_description'] = """Plot heatmap of OTU table"""
script_info['script_description'] = (
"This script visualizes an OTU table as a heatmap where each row "
"corresponds to an OTU and each column corresponds to a sample. The "
"higher the relative abundance of an OTU in a sample, the more intense "
"the color at the corresponsing position in the heatmap. By default, the "
"OTUs (rows) will be clustered by UPGMA hierarchical clustering, and the "
"samples (columns) will be presented in the order in which they appear in "
"the OTU table. Alternatively, the user may supply a tree to sort the "
"OTUs (rows) or samples (columns), or both. The user may also pass in a "
"mapping file for sorting samples. If the user passes in a mapping file "
"and a metadata category, samples (columns) will be grouped by category "
"value and subsequently clustered within each group.")
script_info['script_usage'] = []
script_info['script_usage'].append(
("",
"Generate a heatmap as a PDF using all default values:",
"%prog -i otu_table.biom -o heatmap.pdf"))
script_info['script_usage'].append(
("",
"Generate a heatmap as a PNG:",
"%prog -i otu_table.biom -o heatmap.png -g png"))
script_info['script_usage'].append(
("",
"Sort the heatmap columns (samples) by the order of samples in the "
"mapping file",
"%prog -i otu_table.biom -o heatmap_sorted_samples.pdf -m "
"mapping_file.txt"))
script_info['script_usage'].append(
("",
"Sort the heatmap columns (samples) by the order of samples in the "
"mapping file, and sort the heatmap rows by the order of tips in the "
"tree:",
"%prog -i otu_table.biom -o heatmap_sorted.pdf -m mapping_file.txt -t "
"rep_set.tre"))
script_info['script_usage'].append(
("",
"Group the heatmap columns (samples) by metadata category (e.g., "
"Treatment), then cluster within each group:""",
"%prog -i otu_table.biom -o heatmap_grouped_by_Treatment.pdf -m "
"mapping_file.txt -c Treatment"))
script_info['output_description'] = (
"A single output file is created containing the heatmap of the OTU "
"table (a PDF file by default).")
script_info['required_options'] = [
options_lookup['otu_table_as_primary_input'],
options_lookup['output_fp']
]
script_info['optional_options'] = [
make_option('-t', '--otu_tree', type='existing_filepath', help='Tree file '
'to be used for sorting OTUs in the heatmap', default=None),
make_option('-m', '--map_fname', dest='map_fname',
type='existing_filepath', help='Metadata mapping file to be '
'used for sorting Samples in the heatmap.', default=None),
make_option('-c', '--category', dest='category', type="string",
help='Metadata category for sorting samples. Samples will be '
'clustered within each category level using euclidean UPGMA.',
default=None),
make_option('-s', '--sample_tree', dest='sample_tree',
type='existing_filepath', help='Tree file to be used for '
'sorting samples (e.g, output from upgma_cluster.py). If both '
'this and the sample mapping file are provided, the mapping '
'file is ignored.', default=None),
make_option('-g', '--imagetype',
help='type of image to produce (i.e. png, svg, pdf) '
'[default: %default]', default='pdf', type="choice",
choices=['pdf', 'png', 'svg']),
make_option('--no_log_transform', action="store_true",
help='Data will not be log-transformed. Without this option, '
'all zeros will be set to a small value (default is 1/2 the '
'smallest non-zero entry). Data will be translated to be '
'non-negative after log transform, and num_otu_hits will be '
'set to 0.', default=False),
make_option('--suppress_row_clustering', action="store_true",
help='No UPGMA clustering of OTUs (rows) is performed. If '
'--otu_tree is provided, this flag is ignored.',
default=False),
make_option('--suppress_column_clustering', action="store_true",
help='No UPGMA clustering of Samples (columns) is performed. '
'If --map_fname is provided, this flag is ignored.',
default=False),
make_option('--absolute_abundance', action="store_true",
help='Do not normalize samples to sum to 1 [default: %default]',
default=False),
make_option('--color_scheme', default="YlGn",
help="color scheme for figure. see "
"http://matplotlib.org/examples/color/"
"colormaps_reference.html for choices "
"[default: %default]"),
make_option('--width',
help='width of the figure in inches [default: %default]',
default=5, type='float'),
make_option('--height',
help='height of the figure in inches [default: %default]',
default=5, type='float'),
make_option('--dpi',
help='resolution of the figure in dots per inch '
'[default: value of savefig.dpi in matplotlibrc file]',
type='int', default=None),
make_option('--obs_md_category', default="taxonomy",
help="observation metadata category to plot "
"[default: %default]"),
make_option('--obs_md_level', default=None, type="int",
help="the level of observation metadata to plot for "
"hierarchical metadata [default: lowest level]")
]
script_info['version'] = __version__
def main():
option_parser, opts, args = parse_command_line_parameters(**script_info)
otu_table = load_table(opts.otu_table_fp)
obs_md_category = opts.obs_md_category
obs_md_level = opts.obs_md_level
if obs_md_level is None:
# grab the last level if the user didn't specify a level
obs_md_level = -1
else:
# convert to 0-based indexing
obs_md_level -= 1
obs_md = otu_table.metadata(axis='observation')
obs_md_labels = []
if (obs_md is None or obs_md_category not in obs_md[0]):
obs_md_labels = [['']] * len(otu_table.ids(axis='observation'))
else:
for _, _, md in otu_table.iter(axis='observation'):
current_md = md[obs_md_category]
if obs_md_level < len(current_md):
current_md_at_level = current_md[obs_md_level]
else:
current_md_at_level = ''
obs_md_labels.append([current_md_at_level])
otu_labels = make_otu_labels(otu_table.ids(axis='observation'),
obs_md_labels)
# Convert to relative abundance if requested
if not opts.absolute_abundance:
otu_table = otu_table.norm(axis='observation')
# Get log transform if requested
if not opts.no_log_transform:
otu_table = get_log_transform(otu_table)
# Re-order samples by tree if provided
if opts.sample_tree is not None:
sample_order = get_order_from_tree(otu_table.ids(),
open(opts.sample_tree, 'U'))
# if there's no sample tree, sort samples by mapping file
elif opts.map_fname is not None:
lines = open(opts.map_fname, 'U').readlines()
metadata = list(parse_mapping_file(lines))
new_map, otu_table = get_overlapping_samples(metadata[0], otu_table)
metadata[0] = new_map
map_sample_ids = zip(*metadata[0])[0]
# if there's a category, do clustering within each category
if opts.category is not None:
category_labels = extract_metadata_column(otu_table.ids(),
metadata, opts.category)
sample_order = get_order_from_categories(otu_table,
category_labels)
# else: just use the mapping file order
else:
ordered_sample_ids = []
for sample_id in map_sample_ids:
if otu_table.exists(sample_id):
ordered_sample_ids.append(sample_id)
sample_order = names_to_indices(
otu_table.ids(),
ordered_sample_ids)
# if no tree or mapping file, perform upgma euclidean
elif not opts.suppress_column_clustering:
data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
sample_order = get_clusters(data, axis='column')
# else just use OTU table ordering
else:
sample_order = np.arange(len(otu_table.ids()))
# re-order OTUs by tree (if provided), or clustering
if opts.otu_tree is not None:
# open tree file
try:
f = open(opts.otu_tree, 'U')
except (TypeError, IOError):
raise MissingFileError("Couldn't read tree file at path: %s" %
opts.otu_tree)
otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f)
f.close()
# if no tree or mapping file, perform upgma euclidean
elif not opts.suppress_row_clustering:
data = np.asarray([i for i in otu_table.iter_data(axis='observation')])
otu_order = get_clusters(data, axis='row')
# else just use OTU table ordering
else:
otu_order = np.arange(len(otu_table.ids(axis='observation')))
# otu_order and sample_order should be ids, rather than indices
# to use in sortObservationOrder/sortSampleOrder
otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order]
sample_id_order = [otu_table.ids()[i] for i in sample_order]
# Re-order otu table, sampleids, etc. as necessary
otu_table = otu_table.sort_order(otu_id_order, axis='observation')
otu_labels = np.array(otu_labels)[otu_order]
otu_table = otu_table.sort_order(sample_id_order)
sample_labels = otu_table.ids()
plot_heatmap(otu_table, otu_labels, sample_labels, opts.output_fp,
imagetype=opts.imagetype, width=opts.width,
height=opts.height, dpi=opts.dpi,
color_scheme=opts.color_scheme)
if __name__ == "__main__":
main()