Skip to content

Commit

Permalink
TIKA-1974 -- remove deprecated metadata properties/keys for Tika 2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
tballison committed Jan 26, 2018
1 parent 3ce43ad commit 10a8eec
Show file tree
Hide file tree
Showing 162 changed files with 823 additions and 1,204 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
Release 2.0.0 - ???

* Remove deprecated Metadata keys/properties (TIKA-1974).

* Require Java 8 (TIKA-2553).

* Add a parser for XPS (TIKA-2524).
Expand Down
5 changes: 3 additions & 2 deletions tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.detect.LanguageHandler;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
Expand Down Expand Up @@ -1036,7 +1037,7 @@ public boolean shouldParseEmbedded(Metadata metadata) {
}

public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);

if (name == null) {
name = "file" + count++;
Expand All @@ -1053,7 +1054,7 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
}
}

String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
String relID = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
if (relID != null && !name.startsWith(relID)) {
name = relID + "_" + name;
}
Expand Down
5 changes: 3 additions & 2 deletions tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
Expand Down Expand Up @@ -367,7 +368,7 @@ private void handleStream(InputStream input, Metadata md)
}
}

String name = md.get(Metadata.RESOURCE_NAME_KEY);
String name = md.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0) {
setTitle("Apache Tika: " + name);
} else {
Expand Down Expand Up @@ -653,7 +654,7 @@ public Set<MediaType> getSupportedTypes(ParseContext context) {
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if(name != null && wanted.containsKey(name)) {
FileOutputStream out = new FileOutputStream(wanted.get(name));
IOUtils.copy(stream, out);
Expand Down
18 changes: 8 additions & 10 deletions tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ public void testJsonMetadataOutput() throws Exception {
//test legacy alphabetic sort of keys
int enc = json.indexOf("\"Content-Encoding\"");
int fb = json.indexOf("fb:admins");
int title = json.indexOf("\"title\"");
int title = json.indexOf("\"dc:title\"");
assertTrue(enc > -1 && fb > -1 && enc < fb);
assertTrue (fb > -1 && title > -1 && fb < title);
assertTrue (fb > -1 && title > -1 && fb > title);
assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
}

Expand All @@ -183,9 +183,9 @@ public void testJsonMetadataPrettyPrintOutput() throws Exception {
//test legacy alphabetic sort of keys
int enc = json.indexOf("\"Content-Encoding\"");
int fb = json.indexOf("fb:admins");
int title = json.indexOf("\"title\"");
int title = json.indexOf("\"dc:title\"");
assertTrue(enc > -1 && fb > -1 && enc < fb);
assertTrue (fb > -1 && title > -1 && fb < title);
assertTrue (fb > -1 && title > -1 && fb > title);
}

/**
Expand Down Expand Up @@ -398,12 +398,10 @@ public void testJsonRecursiveMetadataParserMetadataOnly() throws Exception {
String[] params = new String[]{"-m", "-J", "-r", resourcePrefix+"test_recursive_embedded.docx"};
TikaCLI.main(params);
String content = outContent.toString(UTF_8.name());
assertTrue(content.contains("[\n" +
" {\n" +
" \"Application-Name\": \"Microsoft Office Word\",\n" +
" \"Application-Version\": \"15.0000\",\n" +
" \"Character Count\": \"28\",\n" +
" \"Character-Count-With-Spaces\": \"31\","));
assertTrue(content.contains(
"\"extended-properties:AppVersion\": \"15.0000\","));
assertTrue(content.contains(
"\"extended-properties:Application\": \"Microsoft Office Word\","));
assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": \"/embed1.zip\""));
assertFalse(content.contains("X-TIKA:content"));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@

import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.util.PropsUtil;

/**
* Selector that chooses files based on their file name
* and their size, as determined by Metadata.RESOURCE_NAME_KEY and Metadata.CONTENT_LENGTH.
* and their size, as determined by TikaCoreProperties.RESOURCE_NAME_KEY and Metadata.CONTENT_LENGTH.
* <p/>
* The {@link #excludeFileName} pattern is applied first (if it isn't null).
* Then the {@link #includeFileName} pattern is applied (if it isn't null),
Expand All @@ -52,7 +53,7 @@ public FSDocumentSelector(Pattern includeFileName, Pattern excludeFileName, long

@Override
public boolean select(Metadata metadata) {
String fName = metadata.get(Metadata.RESOURCE_NAME_KEY);
String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
long sz = PropsUtil.getLong(metadata.get(Metadata.CONTENT_LENGTH), -1L);
if (maxFileSizeBytes > -1 && sz > 0) {
if (sz > maxFileSizeBytes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@
import org.apache.tika.batch.FileResource;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;

/**
* FileSystem(FS)Resource wraps a file name.
* <p/>
* This class automatically sets the following keys in Metadata:
* <ul>
* <li>Metadata.RESOURCE_NAME_KEY (file name)</li>
* <li>TikaCoreProperties.RESOURCE_NAME_KEY (file name)</li>
* <li>Metadata.CONTENT_LENGTH</li>
* <li>FSProperties.FS_REL_PATH</li>
* <li>FileResource.FILE_EXTENSION</li>
Expand Down Expand Up @@ -76,7 +77,7 @@ public FSFileResource(Path inputRoot, Path fullPath) {

//need to set these now so that the filter can determine
//whether or not to crawl this file
metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
long sz = -1;
try {
sz = Files.size(fullPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
public void testEmbeddedWithNPE() throws Exception {
final String path = "/test-documents/embedded_with_npe.xml";
final Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, "embedded_with_npe.xml");
metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "embedded_with_npe.xml");

ArrayBlockingQueue<FileResource> queue = new ArrayBlockingQueue<FileResource>(2);
queue.add(new FileResource() {
Expand Down Expand Up @@ -93,7 +93,7 @@ queue, new AutoDetectParserFactory(), new BasicContentHandlerFactory(BasicConten
public void testEmbeddedThenNPE() throws Exception {
final String path = "/test-documents/embedded_then_npe.xml";
final Metadata metadata = new Metadata();
metadata.add(Metadata.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "embedded_then_npe.xml");

ArrayBlockingQueue<FileResource> queue = new ArrayBlockingQueue<FileResource>(2);
queue.add(new FileResource() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.tika.detect.Detector;
import org.apache.tika.fork.ForkParser;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.DefaultParser;
Expand Down Expand Up @@ -130,10 +131,10 @@ public void testManifestNoJUnit() throws Exception {
@Test
public void testBundleDetection() throws Exception {
Metadata metadataTXT = new Metadata();
metadataTXT.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
metadataTXT.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");

Metadata metadataPDF = new Metadata();
metadataPDF.set(Metadata.RESOURCE_NAME_KEY, "test.pdf");
metadataPDF.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.pdf");

// Simple type detection
assertEquals(MediaType.TEXT_PLAIN, contentTypeDetector.detect(null, metadataTXT));
Expand Down
1 change: 1 addition & 0 deletions tika-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
<exlude>org/apache/tika/metadata/Metadata</exlude>
<exlude>org/apache/tika/metadata/MSOffice</exlude>
<exlude>org/apache/tika/parser/EmptyParser</exlude>
<exclude>org/apache/tika/metadata/TikaMetadataKeys</exclude>
</excludes>
<comparisonArtifacts>
<comparisonArtifact>
Expand Down
3 changes: 2 additions & 1 deletion tika-core/src/main/java/org/apache/tika/Tika.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
Expand Down Expand Up @@ -180,7 +181,7 @@ public String detect(InputStream stream, Metadata metadata)
*/
public String detect(InputStream stream, String name) throws IOException {
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY, name);
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
return detect(stream, metadata);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.regex.Pattern;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;

import static java.nio.charset.StandardCharsets.UTF_8;
Expand Down Expand Up @@ -95,7 +96,7 @@ public NameDetector(Map<Pattern, MediaType> patterns) {
*/
public MediaType detect(InputStream input, Metadata metadata) {
// Look for a resource name in the input metadata
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
// If the name is a URL, skip the trailing query
int question = name.indexOf('?');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.AutoDetectParser;
Expand Down Expand Up @@ -111,7 +112,7 @@ public void parse(
TikaInputStream tis = TikaInputStream.get(stream, tmp);

// Figure out what we have to process
String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
String filename = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
MediaType type = detector.detect(tis, metadata);

if (extractor == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.DelegatingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
Expand Down Expand Up @@ -64,7 +65,7 @@ public boolean shouldParseEmbedded(Metadata metadata) {

FilenameFilter filter = context.get(FilenameFilter.class);
if (filter != null) {
String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
return filter.accept(ABSTRACT_PATH, name);
}
Expand All @@ -82,7 +83,7 @@ public void parseEmbedded(
handler.startElement(XHTML, "div", "div", attributes);
}

String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0 && outputHtml) {
handler.startElement(XHTML, "h1", "h1", new AttributesImpl());
char[] chars = name.toCharArray();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public static String normalize(final String name) {
* <p>
* The goal of this is to get a filename from a path.
* The package parsers and some other embedded doc
* extractors could put anything into Metadata.RESOURCE_NAME_KEY.
* extractors could put anything into TikaCoreProperties.RESOURCE_NAME_KEY.
* <p>
* If a careless client used that filename as if it were a
* filename and not a path when writing embedded files,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import java.sql.SQLException;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;

/**
* Input stream with extended capabilities. The purpose of this class is
Expand Down Expand Up @@ -222,7 +223,7 @@ public static TikaInputStream get(Path path) throws IOException {
*/
public static TikaInputStream get(Path path, Metadata metadata)
throws IOException {
metadata.set(Metadata.RESOURCE_NAME_KEY, path.getFileName().toString());
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
return new TikaInputStream(path);
}
Expand Down Expand Up @@ -262,7 +263,7 @@ public static TikaInputStream get(File file) throws FileNotFoundException {
@Deprecated
public static TikaInputStream get(File file, Metadata metadata)
throws FileNotFoundException {
metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.length()));
return new TikaInputStream(file);
}
Expand Down Expand Up @@ -410,7 +411,7 @@ public static TikaInputStream get(URL url, Metadata metadata)
String path = url.getPath();
int slash = path.lastIndexOf('/');
if (slash + 1 < path.length()) { // works even with -1!
metadata.set(Metadata.RESOURCE_NAME_KEY, path.substring(slash + 1));
metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.substring(slash + 1));
}

String type = connection.getContentType();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*/
public interface AccessPermissions {

final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
final static String PREFIX = "access_permission"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;

/**
* Can any modifications be made to the document
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* limitations under the License.
*/
public interface Database {
final static String PREFIX = "database"+Metadata.NAMESPACE_PREFIX_DELIMITER;
final static String PREFIX = "database"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;

Property TABLE_NAME = Property.externalTextBag(PREFIX+"table_name");
Property COLUMN_COUNT = Property.externalText(PREFIX+"column_count");
Expand Down
Loading

0 comments on commit 10a8eec

Please sign in to comment.