Skip to content

Commit

Permalink
Added option to add output header to the output + Tests
Browse files Browse the repository at this point in the history
  • Loading branch information
harelba committed Jun 7, 2014
1 parent 471371d commit 9bfca3f
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 4 deletions.
26 changes: 22 additions & 4 deletions bin/q
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ default_formatting = get_option_with_default(p, 'string', 'formatting', None)
default_encoding = get_option_with_default(p, 'string', 'encoding', 'UTF-8')
default_output_encoding = get_option_with_default(p, 'string', 'encoding', None)
default_query_encoding = get_option_with_default(p, 'string', 'query_encoding', locale.getpreferredencoding())
default_output_header = get_option_with_default(p, 'string', 'output_header', False)

parser = OptionParser(usage="""
q allows performing SQL-like statements on tabular text data.
Expand Down Expand Up @@ -130,6 +131,7 @@ parser.add_option("-T", "--tab-delimited-output", dest="tab_delimited_output", d
help="Same as -D <tab>. Just a shorthand for outputing tab delimited output. You can use -D $'\t' if you want.")
parser.add_option("-H", "--skip-header", dest="skip_header", default=default_skip_header, action="store_true",
help="Skip header row. This has been changed from earlier version - Only one header row is supported, and the header row is used for column naming")
parser.add_option("-O", "--output-header", dest="output_header", default=default_output_header, action="store_true",help="Output header line. Output column-names are determined from the query itself. Use column aliases in order to set your column names in the query. For example, 'select name FirstName,value1/value2 MyCalculation from ...'. This can be used even if there was no header in the input.")
parser.add_option("-f", "--formatting", dest="formatting", default=default_formatting,
help="Output-level formatting, in the format X=fmt,Y=fmt etc, where X,Y are output column numbers (e.g. 1 for first SELECT column etc.")
parser.add_option("-e", "--encoding", dest="encoding", default=default_encoding,
Expand All @@ -155,6 +157,10 @@ parser.add_option("-k", "--keep-leading-whitespace", dest="keep_leading_whitespa
def regexp(regular_expression, data):
return re.search(regular_expression, data) is not None

class Sqlite3DBResults(object):
def __init__(self,query_column_names,results):
self.query_column_names = query_column_names
self.results = results

class Sqlite3DB(object):

Expand Down Expand Up @@ -184,12 +190,17 @@ class Sqlite3DB(object):
def execute_and_fetch(self, q):
try:
if self.show_sql:
print q
print repr(q)
self.cursor.execute(q)
if self.cursor.description is not None:
# we decode the column names, so they can be encoded to any output format later on
query_column_names = [c[0].decode('utf-8') for c in self.cursor.description]
else:
query_column_names = None
result = self.cursor.fetchall()
finally:
pass # cursor.close()
return result
return Sqlite3DBResults(query_column_names,result)

def _get_as_list_str(self, l):
return ",".join(['"%s"' % x.replace('"', '""') for x in l])
Expand Down Expand Up @@ -299,6 +310,8 @@ class Sql(object):
# names
self.qtable_name_effective_table_names = {}

self.query_column_names = None

# Go over all sql parts
idx = 0
while idx < len(self.sql_parts):
Expand Down Expand Up @@ -358,7 +371,8 @@ class Sql(object):
return " ".join(effective_sql)

def execute_and_fetch(self, db):
return db.execute_and_fetch(self.get_effective_sql())
db_results_obj = db.execute_and_fetch(self.get_effective_sql())
return db_results_obj


class LineSplitter(object):
Expand Down Expand Up @@ -971,7 +985,9 @@ try:
sys.exit(0)

# Execute the query and fetch the data
m = sql_object.execute_and_fetch(db)
db_results_obj = sql_object.execute_and_fetch(db)
m = db_results_obj.results
output_column_name_list = db_results_obj.query_column_names
except EmptyDataException:
print >>sys.stderr, "Warning - data is empty"
sys.exit(0)
Expand Down Expand Up @@ -1023,6 +1039,8 @@ else:
formatting_dict = None

try:
if options.output_header and output_column_name_list is not None:
m.insert(0,output_column_name_list)
for rownum, row in enumerate(m):
row_str = []
for i, col in enumerate(row):
Expand Down
43 changes: 43 additions & 0 deletions test/test-suite
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,20 @@ class BasicTests(AbstractQTestCase):

self.cleanup(tmpfile)

def test_output_header_when_input_header_exists(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
cmd = '../bin/q -d , "select name from %s" -H -O' % tmpfile.name
retcode, o, e = run_command(cmd)

self.assertEquals(retcode, 0)
self.assertEquals(len(o), 4)
self.assertEquals(o[0],'name')
self.assertEquals(o[1],'a')
self.assertEquals(o[2],'b')
self.assertEquals(o[3],'c')

self.cleanup(tmpfile)

def test_generated_column_name_warning_when_header_line_exists(self):
tmpfile = self.create_file_with_data(sample_data_with_header)
cmd = '../bin/q -d , "select c3 from %s" -H' % tmpfile.name
Expand Down Expand Up @@ -558,6 +572,25 @@ class BasicTests(AbstractQTestCase):
self.cleanup(tmp_data_file)
self.cleanup(tmp_query_file)

def test_output_header_with_non_ascii_names(self):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' Hr\xc3\xa1\xc4\x8d from %s" % tmp_data_file.name,encoding=None)

cmd = '../bin/q -d , -q %s -H -Q utf-8 -O' % tmp_query_file.name
retcode, o, e = run_command(cmd)

self.assertEquals(retcode,0)
self.assertEquals(len(o),4)
self.assertEquals(len(e),0)

self.assertEquals(o[0].decode(SYSTEM_ENCODING), u'name,Hr\xe1\u010d')
self.assertEquals(o[1].decode(SYSTEM_ENCODING), u'a,Hr\xe1\u010d')
self.assertEquals(o[2].decode(SYSTEM_ENCODING), u'b,Hr\xe1\u010d')
self.assertEquals(o[3].decode(SYSTEM_ENCODING), u'c,Hr\xe1\u010d')

self.cleanup(tmp_data_file)
self.cleanup(tmp_query_file)

def test_use_query_file_with_query_encoding(self):
tmp_data_file = self.create_file_with_data(sample_data_with_header)
tmp_query_file = self.create_file_with_data("select name,'Hr\xc3\xa1\xc4\x8d' from %s" % tmp_data_file.name,encoding=None)
Expand Down Expand Up @@ -907,6 +940,16 @@ class SqlTests(AbstractQTestCase):
self.assertEquals(o[0], 'ppp [email protected]')
self.assertEquals(o[1], 'ppp [email protected]')

def test_join_example_with_output_header(self):
cmd = '../bin/q -O "select myfiles.c8 aaa,emails.c2 bbb from ../examples/exampledatafile myfiles join ../examples/group-emails-example emails on (myfiles.c4 = emails.c1) where myfiles.c8 = \'ppp\'"'
retcode, o, e = run_command(cmd)

self.assertEquals(retcode, 0)
self.assertEquals(len(o), 3)

self.assertEquals(o[0], 'aaa bbb')
self.assertEquals(o[1], 'ppp [email protected]')
self.assertEquals(o[2], 'ppp [email protected]')

def suite():
tl = unittest.TestLoader()
Expand Down

0 comments on commit 9bfca3f

Please sign in to comment.