Skip to content

Commit

Permalink
add embedding depth to rpw handler
Browse files Browse the repository at this point in the history
  • Loading branch information
tballison committed Nov 22, 2019
1 parent cb3c4ba commit 3d2afec
Showing 1 changed file with 6 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ public abstract class AbstractRecursiveParserWrapperHandler extends DefaultHandl
public final static Property EMBEDDED_RESOURCE_PATH =
Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"embedded_resource_path");

public final static Property EMBEDDED_DEPTH =
Property.internalInteger(TikaCoreProperties.TIKA_META_PREFIX+"embedded_depth");


private final ContentHandlerFactory contentHandlerFactory;

private static final int MAX_DEPTH = 100;
Expand Down Expand Up @@ -93,6 +97,7 @@ public void startEmbeddedDocument(ContentHandler contentHandler, Metadata metada
if (embeddedDepth >= MAX_DEPTH) {
throw new SAXException("Max embedded depth reached: "+embeddedDepth);
}
metadata.set(EMBEDDED_DEPTH, embeddedDepth);
}
/**
* This is called after parsing each embedded document. Override this
Expand Down Expand Up @@ -120,6 +125,7 @@ public void endDocument(ContentHandler contentHandler, Metadata metadata) throws
if (hasHitMaximumEmbeddedResources()) {
metadata.set(EMBEDDED_RESOURCE_LIMIT_REACHED, "true");
}
metadata.set(EMBEDDED_DEPTH, 0);
}

/**
Expand Down

0 comments on commit 3d2afec

Please sign in to comment.