Skip to content

Commit

Permalink
feat(remove_unzipper_module): delete npm unzipper, replace with GDAL …
Browse files Browse the repository at this point in the history
…virtual filesystem
  • Loading branch information
missinglink committed Mar 10, 2020
1 parent dd6091a commit 6b0bc02
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 54 deletions.
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@
"sqlite3": "^4.0.0",
"superagent": "^5.1.0",
"through2": "^3.0.0",
"through2-batch": "^1.0.1",
"unzipper": "^0.10.9"
"through2-batch": "^1.0.1"
},
"devDependencies": {
"jshint": "^2.9.3",
Expand Down
10 changes: 5 additions & 5 deletions script/conflate_tiger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,18 @@ rm -f $PROC_STDOUT $PROC_STDERR $PROC_CONFERR;

# download path of tiger files (use default unless param is supplied)
TIGERPATH=${TIGERPATH:-"$WORKINGDIR/data/tiger"};
# ensure shapefiles directory exists
[ -d "$TIGERPATH/shapefiles" ] || mkdir -p "$TIGERPATH/shapefiles";
# ensure downloads directory exists
[ -d "$TIGERPATH/downloads" ] || mkdir -p "$TIGERPATH/downloads";

# recurse through filesystem listing all .shx file names
# recurse through filesystem listing all .zip file names
# some county zip packages are missing .shx which causes the ogr2ogr script to fail
find "$TIGERPATH/shapefiles" -type f -iname "*.shx" -print0 |\
find "$TIGERPATH/downloads" -type f -iname "*.zip" -print0 |\
while IFS= read -r -d $'\0' filename; do

# echo filename to stderr
>&2 echo $(date -u) "$filename";

ogr2ogr -f GeoJSON -t_srs crs:84 /vsistdout/ "$filename" |\
ogr2ogr -f GeoJSON -t_srs crs:84 /vsistdout/ /vsizip/$filename |\
node --max-old-space-size=8192 $DIR/../cmd/tiger.js $ADDRESS_DB $STREET_DB 1>>$PROC_STDOUT 2>>$PROC_STDERR;

done;
32 changes: 1 addition & 31 deletions script/js/update_tiger.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
const async = require('async');
const path = require('path');
const fs = require('fs-extra');
const unzip = require('unzipper');
const logger = require('pelias-logger').get('interpolation(TIGER)');
const config = require('pelias-config').generate();
const _ = require('lodash');
Expand Down Expand Up @@ -35,8 +34,6 @@ async.eachSeries(STATES, download, (err)=>{
logger.error(err);
process.exit(1);
}

process.exit(0);
});

function download(state, callback) {
Expand Down Expand Up @@ -85,15 +82,12 @@ function getFilteredFileList(context, callback) {

function downloadFilteredFiles(context, callback) {
context.downloadsDir = path.join(TARGET_DIR, 'downloads');
context.shapefilesDir = path.join(TARGET_DIR, 'shapefiles');

// ensure directories exist
fs.ensureDirSync(context.downloadsDir);
fs.ensureDirSync(context.shapefilesDir);

// ensure directories are writable
fs.accessSync(context.downloadsDir, fs.constants.R_OK | fs.constants.W_OK);
fs.accessSync(context.shapefilesDir, fs.constants.R_OK | fs.constants.W_OK);

// must use eachSeries here because the ftp connection only allows one download at a time
async.eachSeries(context.files, downloadFile.bind(null, context), callback);
Expand All @@ -106,30 +100,6 @@ function downloadFile(context, filename, callback) {
logger.info(`Downloading ${filename}`);
if (err) { return callback(err); }
logger.debug(`Downloaded ${filename}`);

// record unzip errors
let unzipError = null;

// decompress files to shapefile directory
const decompress = unzip.Extract({ path: context.shapefilesDir });
decompress.on('error', (err) => {
unzipError = err;
logger.error(`Failed to unzip ${filename}`);
logger.error(err);
});

// unzip downloaded file
logger.info(`Decompressing ${filename}`);
fs.createReadStream(localFile)
.pipe(decompress)
.on('finish', () => {
logger.debug(`Decompressed ${filename}`);

// delete zip file after unzip is done
fs.unlinkSync(localFile);

// return unzip error if one occurred
callback(unzipError);
});
callback();
});
}
16 changes: 0 additions & 16 deletions script/update_tiger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ TIGERPATH=${TIGERPATH:-"$WORKINGDIR/data/tiger"};

# create directory if it doesn't exist
mkdir -p $TIGERPATH/downloads;
mkdir -p $TIGERPATH/shapefiles;

# ensure lftp exists and is executable
if [[ ! -f /usr/bin/lftp || ! -x /usr/bin/lftp ]]; then
Expand All @@ -24,18 +23,3 @@ lftp <<-SCRIPT
mirror -e -n -r --parallel=20 --ignore-time /geo/tiger/TIGER2016/ADDRFEAT/ $TIGERPATH/downloads
exit
SCRIPT

# ensure unzip exists and is executable
if [[ ! -f /usr/bin/unzip || ! -x /usr/bin/unzip ]]; then
echo "unzip not installed on system";
exit 1;
fi

# delete directory contents
cd $TIGERPATH/shapefiles;
find -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf;

# extract all the shapefiles from downloads in to dir
for file in $TIGERPATH/downloads/*.zip; do
unzip "$file" -d $TIGERPATH/shapefiles;
done

0 comments on commit 6b0bc02

Please sign in to comment.