Skip to content

Commit

Permalink
Add --skip-errors option (simonw#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
janimo authored and Simon Willison committed Dec 8, 2017
1 parent 00e8f7e commit c57f6d1
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
5 changes: 3 additions & 2 deletions csvs_to_sqlite/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
@click.argument('dbname', nargs=1)
@click.option('--separator', '-s', default=',', help='Field separator in input .csv')
@click.option('--quoting', '-q', default=0, help='Control field quoting behavior per csv.QUOTE_* constants. Use one of QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).')
@click.option('--skip-errors', is_flag=True, help='Skip lines with too many fields instead of stopping the import')
@click.option('--replace-tables', is_flag=True, help='Replace tables if they already exist')
@click.option('--extract-column', '-c', multiple=True, help=(
"One or more columns to 'extract' into a separate lookup table. "
Expand All @@ -45,7 +46,7 @@
"One or more columns to use to populate a full-text index"
))
@click.version_option()
def cli(paths, dbname, separator, quoting, replace_tables, extract_column, fts):
def cli(paths, dbname, separator, quoting, skip_errors, replace_tables, extract_column, fts):
"""
PATHS: paths to individual .csv files or to directories containing .csvs
Expand All @@ -72,7 +73,7 @@ def cli(paths, dbname, separator, quoting, replace_tables, extract_column, fts):
csvs = csvs_from_paths(paths)
for name, path in csvs.items():
try:
df = load_csv(path, separator, quoting)
df = load_csv(path, separator, skip_errors, quoting)
df.table_name = name
dataframes.append(df)
except LoadCsvError as e:
Expand Down
4 changes: 2 additions & 2 deletions csvs_to_sqlite/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ class LoadCsvError(Exception):
pass


def load_csv(filepath, separator, quoting, encodings_to_try=('utf8', 'latin-1')):
def load_csv(filepath, separator, skip_errors, quoting, encodings_to_try=('utf8', 'latin-1')):
try:
for encoding in encodings_to_try:
try:
return pd.read_csv(filepath, sep=separator, quoting=quoting, low_memory=True, encoding=encoding)
return pd.read_csv(filepath, sep=separator, quoting=quoting, error_bad_lines=not skip_errors, low_memory=True, encoding=encoding)
except UnicodeDecodeError:
continue
except pd.errors.ParserError as e:
Expand Down

0 comments on commit c57f6d1

Please sign in to comment.