Skip to content

Commit

Permalink
Merge pull request geoserver#4479 from dromagnoli/GEOS-9733
Browse files Browse the repository at this point in the history
[GEOS-9733]: Importer, stop scanning the whole directory when looking for supplemental files
  • Loading branch information
dromagnoli authored Sep 11, 2020
2 parents 06514c6 + b24c21e commit 0945d54
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* (c) 2020 Open Source Geospatial Foundation - all rights reserved
* This code is licensed under the GPL 2.0 license, available at the root
* application directory.
*/
package org.geoserver.importer;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang.StringUtils;

/** Default implementation of a SupplementalFileExtensionsProvider */
public class DefaultSupplementalFileExtensionsProvider
implements SupplementalFileExtensionsProvider {
private Set<String> acceptedInputExtensions;
private Set<String> supplementalExtensions;
private Set<String> upperCaseSupplementalExtensions;

public DefaultSupplementalFileExtensionsProvider(
Set<String> acceptedInputExtensions, Set<String> supplementalExtensions) {
this.acceptedInputExtensions = Collections.unmodifiableSet(acceptedInputExtensions);
this.supplementalExtensions = Collections.unmodifiableSet(supplementalExtensions);
Set<String> upperCase = new HashSet<>();
supplementalExtensions.stream().forEach(e -> upperCase.add(e.toUpperCase()));
upperCaseSupplementalExtensions = Collections.unmodifiableSet(upperCase);
}

private boolean isSupportedInputExtension(String extension) {
return extension != null && acceptedInputExtensions.contains(extension.toLowerCase());
}

public boolean canHandle(String baseExtension) {
return baseExtension != null
&& acceptedInputExtensions.contains(baseExtension.toLowerCase());
}

public Set<String> getExtensions(String baseExtension) {
if (!isSupportedInputExtension(baseExtension)) return Collections.emptySet();
// some data providers produce tiff files being stored as .TIF
// we can reasonably suppose that supplemental files will be upper case too
// i.e. .PRJ, .XML
return StringUtils.isAllUpperCase(baseExtension)
? upperCaseSupplementalExtensions
: supplementalExtensions;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@
package org.geoserver.importer;

import static org.apache.commons.io.FilenameUtils.getBaseName;
import static org.apache.commons.io.FilenameUtils.getExtension;

import com.google.common.base.Function;
import com.google.common.collect.Lists;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
import java.util.logging.Level;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.io.FileUtils;
import org.geoserver.catalog.StyleHandler;
import org.geoserver.catalog.Styles;
import org.geoserver.importer.job.ProgressMonitor;
import org.geoserver.platform.GeoServerExtensions;
import org.geoserver.util.IOUtils;
import org.geotools.referencing.CRS;
import org.opengis.referencing.FactoryException;
Expand Down Expand Up @@ -99,36 +101,49 @@ public String apply(@Nullable StyleHandler input) {

// getBaseName only gets the LAST extension so beware for .shp.aux.xml stuff
final String baseName = getBaseName(file.getName());
final String baseExtension = getExtension(file.getName());
final String basePath = file.getParent();

// look for style files
Iterator styleExtensionsIt = styleExtensions.iterator();
while (styleFile == null && styleExtensionsIt.hasNext()) {
Object ext = styleExtensionsIt.next();
File style = new File(basePath, baseName + "." + ext);
if (style.exists()) {
// TODO: deal with multiple style files? for now we just grab the first
styleFile = style;
}
}

File[] files = file.getParentFile().listFiles();
if (files != null) {
for (File f : files) {
if (f.equals(file)) {
continue;
}

if (!f.getName().startsWith(baseName)) {
continue;
}

if (!f.isFile()) {
continue;
}

String ext = f.getName().substring(baseName.length());
// once the basename is stripped, extension(s) should be present
if (ext.charAt(0) == '.') {
if (".prj".equalsIgnoreCase(ext)) {
prjFile = f;
} else if (styleFile == null && styleExtensions.contains(ext.substring(1))) {
// TODO: deal with multiple style files? for now we just grab the first
styleFile = f;
// The previous version of the code was doing a File.listFiles,
// looking for files with same name of the input file. However
// this was resulting in very time consuming operation when importing
// a file living into a directory containing thousands of files.
// Especially on system doing continuous ingest of a new file every few minute
// with a continuously growing directory.

// Looking for supplemental files
List<SupplementalFileExtensionsProvider> extensionsProviders =
GeoServerExtensions.extensions(SupplementalFileExtensionsProvider.class);

// different providers can provide same supplementalFile extension so let's use a set
Set<File> supplementalSet = new HashSet<>();
for (SupplementalFileExtensionsProvider provider : extensionsProviders) {
// get extensions from each provider
for (String extension : provider.getExtensions(baseExtension)) {
File supplementalFile = new File(basePath, baseName + "." + extension);
if (supplementalFile.exists()) {
if ("prj".equalsIgnoreCase(extension)) {
prjFile = supplementalFile;
} else {
suppFiles.add(f);
supplementalSet.add(supplementalFile);
}
}
}
}

suppFiles = supplementalSet.stream().collect(Collectors.toList());

if (format == null) {
format = DataFormat.lookup(file);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/* (c) 2020 Open Source Geospatial Foundation - all rights reserved
* This code is licensed under the GPL 2.0 license, available at the root
* application directory.
*/
package org.geoserver.importer;

import java.util.*;

/** A Class reporting a Set of default file extensions for some commonly used spatial files */
public class SpatialFileExtensionsProvider implements SupplementalFileExtensionsProvider {

public SpatialFileExtensionsProvider() {};

static class JPEGFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

JPEGFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("jpeg", "jpg")),
new HashSet<String>(Arrays.asList("jpw", "wld", "prj")));
}
};

static class TIFFFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

TIFFFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("tif", "tiff")),
new HashSet<String>(
Arrays.asList(
"tfw", "wld", "aux", "rrd", "xml", "tif.aux.xml", "prj")));
}
};

static class PNGFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

PNGFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("png")),
new HashSet<String>(Arrays.asList("pnw", "wld", "aux.xml", "xml", "prj")));
}
};

static class NetCDFFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

NetCDFFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("nc")),
new HashSet<String>(Arrays.asList("ncx", "aux.xml", "xml", "prj")));
}
};

static class GribFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

GribFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("grib", "grb", "grib2", "grb2")),
new HashSet<String>(
Arrays.asList(
"grb2.ncx3",
"gbx9",
"ncx3",
"gbx9.ncx3",
"aux.xml",
"xml",
"prj")));
}
};

static class ShapeFileExtensionsProvider extends DefaultSupplementalFileExtensionsProvider {

ShapeFileExtensionsProvider() {
super(
new HashSet<String>(Arrays.asList("shp")),
new HashSet<String>(
Arrays.asList(
"shx",
"dbf",
"aux.xml",
"idx",
"sbx",
"sbn",
"shp.ed.lock",
"shp.xml",
"prj")));
}
};

private static final ShapeFileExtensionsProvider SHAPEFILE_PROVIDER =
new ShapeFileExtensionsProvider();
private static final TIFFFileExtensionsProvider TIF_PROVIDER = new TIFFFileExtensionsProvider();
private static final JPEGFileExtensionsProvider JPEG_PROVIDER =
new JPEGFileExtensionsProvider();
private static final PNGFileExtensionsProvider PNG_PROVIDER = new PNGFileExtensionsProvider();
private static final NetCDFFileExtensionsProvider NETCDF_PROVIDER =
new NetCDFFileExtensionsProvider();
private static final GribFileExtensionsProvider GRIB_PROVIDER =
new GribFileExtensionsProvider();

static Map<String, SupplementalFileExtensionsProvider> PROVIDERS = new HashMap();

{
// Providers being setup on the available tests
PROVIDERS.put("tif", TIF_PROVIDER);
PROVIDERS.put("tiff", TIF_PROVIDER);
PROVIDERS.put("jpeg", JPEG_PROVIDER);
PROVIDERS.put("jpg", JPEG_PROVIDER);
PROVIDERS.put("png", PNG_PROVIDER);
PROVIDERS.put("shp", SHAPEFILE_PROVIDER);
PROVIDERS.put("nc", NETCDF_PROVIDER);
PROVIDERS.put("grib", GRIB_PROVIDER);
PROVIDERS.put("grib2", GRIB_PROVIDER);
}

public Set<String> getExtensions(String baseExtension) {
if (canHandle(baseExtension)) {
return PROVIDERS.get(baseExtension.toLowerCase()).getExtensions(baseExtension);
}
return Collections.emptySet();
}

public boolean canHandle(String baseExtension) {
return baseExtension != null && PROVIDERS.containsKey(baseExtension.toLowerCase());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* (c) 2020 Open Source Geospatial Foundation - all rights reserved
* This code is licensed under the GPL 2.0 license, available at the root
* application directory.
*/
package org.geoserver.importer;

import java.util.Set;

/**
* A Class reporting a Set of file extensions for a given base extension. Additional implementations
* may return additional file extensions (i.e. getting them from a Datadir or external file
* definition).
*/
public interface SupplementalFileExtensionsProvider {

/**
* Return the set of supplemental file extensions available for the given base input extension
*/
Set<String> getExtensions(String baseExtension);

/** Check if this provider can handle the specified base input extension */
boolean canHandle(String baseExtension);
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@
<bean id="gmlFormat" class="org.geoserver.importer.format.GMLFileFormat" />
<bean id="geoJsonFormat" class="org.geoserver.importer.format.GeoJSONFormat" />

<bean id="spatialFileExtensionsProvider" class="org.geoserver.importer.SpatialFileExtensionsProvider" />

</beans>
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package org.geoserver.importer;

import static org.geoserver.importer.ImporterTestUtils.unpack;
import static org.junit.Assert.assertEquals;

import java.io.File;
import java.util.Arrays;
Expand All @@ -17,10 +16,19 @@
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.geoserver.importer.mosaic.Mosaic;
import org.geoserver.platform.GeoServerExtensionsHelper;

public class DirectoryTest extends TestCase {

public void testMosaicAuxillaryFiles() throws Exception {
@Override
public void setUp() {
GeoServerExtensionsHelper.singleton(
"spatialFileExtensionsProvider",
new SpatialFileExtensionsProvider(),
SupplementalFileExtensionsProvider.class);
}

public void testMosaicAuxiliaryFiles() throws Exception {
File unpack = ImporterTestUtils.unpack("mosaic/bm.zip");

// all types of junk!
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/* (c) 2020 Open Source Geospatial Foundation - all rights reserved
* This code is licensed under the GPL 2.0 license, available at the root
* application directory.
*/
package org.geoserver.importer;

import java.util.Set;
import junit.framework.TestCase;

public class SupplementalFileProviderTest extends TestCase {

public void testSupportedSupplementalFiles() throws Exception {

SpatialFileExtensionsProvider provider = new SpatialFileExtensionsProvider();

// Test some unsupported base extensions
assertFalse(provider.canHandle("txt"));
assertFalse(provider.canHandle("pdf"));

// Test some supported extensions
assertTrue(provider.canHandle("tif"));
Set<String> extensions = provider.getExtensions("tif");
assertTrue(extensions.contains("tfw"));
assertTrue(extensions.contains("prj"));
assertTrue(extensions.contains("wld"));
assertTrue(extensions.contains("rrd"));

assertTrue(provider.canHandle("jpg"));
extensions = provider.getExtensions("jpg");
assertTrue(extensions.contains("jpw"));
assertTrue(extensions.contains("prj"));
assertTrue(extensions.contains("wld"));

// Test the UPPERCASE support
extensions = provider.getExtensions("SHP");
assertTrue(extensions.contains("DBF"));
assertTrue(extensions.contains("SHX"));
}
}

0 comments on commit 0945d54

Please sign in to comment.