Skip to content

Commit

Permalink
Escape distribution key when creating WET in gptransfer (#1182)
Browse files Browse the repository at this point in the history
* Escape distribution key when creating WET in gptransfer

* There is an issue where when a user creates a table and the table has
  a column with CAPITAL LETTERS or quotes and that column is used as the
  distributed key, then gptransfer fails. gptransfer tries to create
  the WRITABLE EXTERNAL TABLE for gpfdist with unquoted values and
  breaks; this is because the DISTRIBUTED BY portion is not quoted properly.
* This commit handles the quotation for the distributed key

Authors: Marbin Tan & Todd Sedano
  • Loading branch information
Chibin authored Oct 12, 2016
1 parent 428fe8a commit c7cd299
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 3 deletions.
10 changes: 10 additions & 0 deletions gpMgmt/bin/gppylib/test/behave/mgmt_utils/gptransfer.feature
Original file line number Diff line number Diff line change
Expand Up @@ -2203,6 +2203,16 @@ Feature: gptransfer tests
Then gptransfer should return a return code of 0
And verify that gptransfer is in order of "input_file" when partition transfer is "True"

@distribution_key
Scenario: gptransfer is run with distribution key that has upper case characters
Given the database is running
And database "gptest" exists
And database "gptest" is created if not exists on host "GPTRANSFER_SOURCE_HOST" with port "GPTRANSFER_SOURCE_PORT" with user "GPTRANSFER_SOURCE_USER"
And the user runs "psql -p $GPTRANSFER_SOURCE_PORT -h $GPTRANSFER_SOURCE_HOST -U $GPTRANSFER_SOURCE_USER -f gppylib/test/behave/mgmt_utils/steps/data/gptransfer/distribution_test.sql -d gptest"
And the user runs "gptransfer -t gptest.public.caps -t gptest.public.caps_with_dquote -t gptest.public.caps_with_dquote2 -t gptest.public.caps_with_dquote3 -t gptest.public.caps_with_dquote4 -t gptest.public.caps_with_squote -t gptest.public.randomkey --source-port $GPTRANSFER_SOURCE_PORT --source-host $GPTRANSFER_SOURCE_HOST --source-user $GPTRANSFER_SOURCE_USER --dest-user $GPTRANSFER_DEST_USER --dest-port $GPTRANSFER_DEST_PORT --dest-host $GPTRANSFER_DEST_HOST --source-map-file $GPTRANSFER_MAP_FILE --truncate -a"
Then gptransfer should return a return code of 0


@gptransfer_help
Scenario: use gptransfer --help with another gptransfer process already running.
Given the database is running
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
create table caps as SELECT now()::date as "MYDATE", 1::int as "ID";
create table cap_with_dquote as SELECT now()::date as "MY""DATE", 1::int as "ID";
create table cap_with_dquote2 as SELECT now()::date as "MY\DATE", 1::int as "ID";
create table cap_with_dquote3 as SELECT now()::date as "MY\""DATE", 1::int as "ID";
create table cap_with_dquote4 as SELECT now()::date as "'MY\DATE'", 1::int as "ID";
create table cap_with_squote as SELECT now()::date as "MY\'DATE", 1::int as "ID";
create table randomkey as SELECT now()::date as "MY\'DATE", 1::int as "ID" DISTRIBUTED RANDOMLY;
108 changes: 108 additions & 0 deletions gpMgmt/bin/gppylib/test/unit/test_unit_gptransfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import imp
import os
from mock import *
from gp_unittest import *
from gppylib.db.dbconn import UnexpectedRowsError


class GpTransfer(GpTestCase):
def setUp(self):
# because gptransfer does not have a .py extension,
# we have to use imp to import it
# if we had a gptransfer.py, this is equivalent to:
# import gptransfer
# self.subject = gptransfer
gptransfer_file = os.path.abspath(os.path.dirname(__file__) +
"/../../../gptransfer")
self.subject = imp.load_source('gptransfer',
gptransfer_file)
self.subject.logger = Mock(spec=['log', 'warn', 'info', 'debug', 'error'])

# We have a GIGANTIC class that uses 31 arguments, so pre-setting this
# here
self.GpTransferCommand_args = dict(
name='foo',
src_host='foo',
src_port='foo',
src_user='foo',
dest_host='foo',
dest_port='foo',
dest_user='foo',
table_pair='foo',
dest_exists='foo',
truncate='foo',
analyze='foo',
drop='foo',
fast_mode='foo',
exclusive_lock='foo',
schema_only='foo',
work_dir='foo',
host_map='foo',
source_config='foo',
batch_size='foo',
gpfdist_port='foo',
gpfdist_last_port='foo',
gpfdist_instance_count='foo',
max_line_length='foo',
timeout='foo',
wait_time='foo',
delimiter='foo',
validator='foo',
format='foo',
quote='foo',
table_transfer_set_total='foo')

@patch('gptransfer.TableValidatorFactory', return_value=Mock())
@patch('gptransfer.execSQLForSingletonRow', side_effect=[['MYDATE'],
['MY"DATE'],
['''MY'DATE'''],
['MY""DATE']])
def test__get_distributed_by_with_special_characters_on_column(self, mock1,
mock2):
gptransfer = self.subject
cmd_args = self.GpTransferCommand_args

src_args = ('src', 'public', 'foo', False)
dest_args = ('dest', 'public', 'foo', False)
source_table = gptransfer.GpTransferTable(*src_args)
dest_table = gptransfer.GpTransferTable(*dest_args)
cmd_args['table_pair'] = gptransfer.GpTransferTablePair(source_table,
dest_table)
table_validator = gptransfer.GpTransferCommand(**cmd_args)

expected_results = ['MYDATE',
'MY""DATE',
'MY\'DATE',
'MY""""DATE']
for res in expected_results:
expected_distribution = '''DISTRIBUTED BY ("%s")''' % res
result_distribution = table_validator._get_distributed_by()
self.assertEqual(expected_distribution, result_distribution)

@patch('gptransfer.TableValidatorFactory', return_value=Mock())
@patch('gptransfer.execSQLForSingletonRow',
side_effect=[UnexpectedRowsError(1, 0, "sql foo"),
""])
def test__get_distributed_randomly(self, mock1, mock2):
gptransfer = self.subject
cmd_args = self.GpTransferCommand_args

src_args = ('src', 'public', 'foo', False)
dest_args = ('dest', 'public', 'foo', False)
source_table = gptransfer.GpTransferTable(*src_args)
dest_table = gptransfer.GpTransferTable(*dest_args)
cmd_args['table_pair'] = gptransfer.GpTransferTablePair(source_table,
dest_table)
table_validator = gptransfer.GpTransferCommand(**cmd_args)

expected_distribution = '''DISTRIBUTED RANDOMLY'''
result_distribution = table_validator._get_distributed_by()
self.assertEqual(0, len(self.subject.logger.method_calls))
self.assertEqual(expected_distribution, result_distribution)

result_distribution = table_validator._get_distributed_by()
self.assertEqual(1, len(self.subject.logger.method_calls))
self.assertEqual(expected_distribution, result_distribution)

if __name__ == '__main__':
run_tests()
16 changes: 13 additions & 3 deletions gpMgmt/bin/gptransfer
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ try:
from gppylib.commands.gp import get_gphome
from gppylib.db import dbconn
from gppylib.db.dbconn import connect, DbURL, execSQL, \
execSQLForSingletonRow
execSQLForSingletonRow, UnexpectedRowsError
from gppylib.db.catalog import getUserDatabaseList, doesSchemaExist, dropSchemaIfExist
from gppylib.gparray import GpArray
from gppylib.userinput import ask_yesno
from gppylib.operations.backup_utils import escapeDoubleQuoteInSQLString
from pygresql.pg import DB
from pygresql import pg
except ImportError, import_exception:
Expand Down Expand Up @@ -1667,8 +1668,17 @@ GROUP BY nspname, relname;''' % (self._table_pair.source.schema,
self._table_pair.source.table)
try:
row = execSQLForSingletonRow(self._src_conn, sql)
return 'DISTRIBUTED BY (%s)' % row[0]
except:
# NOTE: ideally, we can use pg.escape_identifier for this part, but
# that is introducted in pygresql 4.1 and we are in 4.0
quoted_column_name = escapeDoubleQuoteInSQLString(row[0], forceDoubleQuote=False)
return '''DISTRIBUTED BY ("%s")''' % quoted_column_name
# NOTE: execSQLForSingletonRow will raise if it doesn't find at least 1
# row. This is expected to happen if the table is DISTRIBUTED RANDOMLY
except UnexpectedRowsError:
return 'DISTRIBUTED RANDOMLY'
except Exception as e:
logger.warn('Unable to determine distribution policy for "%s". Using '
'distributed randomly instead.\nmessage: %s' % (self._table_pair.dest, str(e)))
return 'DISTRIBUTED RANDOMLY'

def _create_target_table(self):
Expand Down

0 comments on commit c7cd299

Please sign in to comment.