Skip to content

Commit

Permalink
TIKA-2931 (apache#281)
Browse files Browse the repository at this point in the history
* need to lowercase the output file to match the format passed to tesseract cmd line

* need to lowercase the output file to match the format passed to tesseract cmd line

* pull original code from apache/tika

* Send informational output to System.err, not System.out so commandline json response is clean
  • Loading branch information
epugh authored and tballison committed Sep 4, 2019
1 parent e939504 commit 056304f
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
2 changes: 1 addition & 1 deletion tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,7 @@ public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler
throw new IOException("unable to create directory \"" + parent + "\"");
}
}
System.out.println("Extracting '"+name+"' ("+contentType+") to " + outputFile);
System.err.println("Extracting '"+name+"' ("+contentType+") to " + outputFile);

try (FileOutputStream os = new FileOutputStream(outputFile)) {
if (inputStream instanceof TikaInputStream) {
Expand Down
10 changes: 8 additions & 2 deletions tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,22 @@ public class TikaCLITest {

/* Test members */
private ByteArrayOutputStream outContent = null;
private ByteArrayOutputStream errContent = null;
private PrintStream stdout = null;
private PrintStream stderr = null;
private File testDataFile = new File("src/test/resources/test-data");
private URI testDataURI = testDataFile.toURI();
private String resourcePrefix;

@Before
public void setUp() throws Exception {
outContent = new ByteArrayOutputStream();
errContent = new ByteArrayOutputStream();
resourcePrefix = testDataURI.toString();
stdout = System.out;
stderr = System.err;
System.setOut(new PrintStream(outContent, true, UTF_8.name()));
System.setErr(new PrintStream(errContent, true, UTF_8.name()));
}

/**
Expand Down Expand Up @@ -258,11 +263,12 @@ public void testListSupportedTypes() throws Exception{
}

/**
* Tears down the test. Returns the System.out
* Tears down the test. Returns the System.out and System.err
*/
@After
public void tearDown() throws Exception {
System.setOut(stdout);
System.setErr(stderr);
}

@Test
Expand Down Expand Up @@ -402,7 +408,7 @@ public void testZipWithSubdirs() throws Exception {
new File("subdir/foo.txt").delete();
new File("subdir").delete();
TikaCLI.main(params);
String content = outContent.toString(UTF_8.name());
String content = errContent.toString(UTF_8.name());
assertTrue(content.contains("Extracting 'subdir/foo.txt'"));
// clean up. TODO: These should be in target.
new File("target/subdir/foo.txt").delete();
Expand Down

0 comments on commit 056304f

Please sign in to comment.