Skip to content

Commit

Permalink
Refactor QPDFPageObjectHelper::removeUnreferencedResources()
Browse files Browse the repository at this point in the history
Refactor removeUnreferencedResources to prepare for filtering form
XObjects.
  • Loading branch information
jberkenbilt committed Mar 31, 2020
1 parent b03e6bd commit 278710f
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 5 deletions.
4 changes: 4 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
2020-03-31 Jay Berkenbilt <[email protected]>

* Add QPDFObjectHandle::filterAsContents, which filters a stream's
data as if it were page contents. This can be useful to filter
form XObjects the same way we would filter page contents.

* If QPDF_EXECUTABLE is set, use it as the path to qpdf for
purposes of completion. This variable is only read during the
executation of `qpdf --completion-zsh` and `qpdf
Expand Down
6 changes: 6 additions & 0 deletions include/qpdf/QPDFObjectHandle.hh
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,12 @@ class QPDFObjectHandle
void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
// End legacy content stream helpers

// Called on a stream to filter the stream as if it were page
// contents. This can be used to apply a TokenFilter to a form
// XObject, whose data is in the same format as a content stream.
QPDF_DLL
void filterAsContents(TokenFilter* filter, Pipeline* next = 0);

// Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();
Expand Down
7 changes: 7 additions & 0 deletions include/qpdf/QPDFPageObjectHelper.hh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <qpdf/DLL.h>

#include <qpdf/QPDFObjectHandle.hh>
#include <functional>

class QPDFPageObjectHelper: public QPDFObjectHelper
{
Expand Down Expand Up @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool invert_transformations = true);

private:
static void
removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);

class Members
{
friend class QPDFPageObjectHelper;
Expand Down
10 changes: 10 additions & 0 deletions libqpdf/QPDFObjectHandle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
this->pipePageContents(&token_pipeline);
}

void
QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
{
std::string description = "token filter for object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
}

void
QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks)
Expand Down
30 changes: 25 additions & 5 deletions libqpdf/QPDFPageObjectHelper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -511,24 +511,32 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
}

void
QPDFPageObjectHelper::removeUnreferencedResources()
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
{
if (seen.count(oh.getObjGen()))
{
return;
}
seen.insert(oh.getObjGen());
NameWatcher nw;
try
{
filterPageContents(&nw);
filter_content(&nw);
}
catch (std::exception& e)
{
this->oh.warnIfPossible(
oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects from this page");
return;
}
if (nw.saw_bad)
{
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
this->oh.warnIfPossible(
oh.warnIfPossible(
"Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from this page");
return;
Expand All @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
std::vector<std::string> to_filter;
to_filter.push_back("/Font");
to_filter.push_back("/XObject");
QPDFObjectHandle resources = getAttribute("/Resources", true);
QPDFObjectHandle resources = get_resource();
for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter)
{
Expand All @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources()
}
}

void
QPDFPageObjectHelper::removeUnreferencedResources()
{
std::set<QPDFObjGen> seen;
removeUnreferencedResourcesHelper(
this->oh, seen,
[this]() { return this->getAttribute("/Resources", true); },
[this](QPDFObjectHandle::TokenFilter* f) {
this->filterPageContents(f);
});
}

QPDFPageObjectHelper
QPDFPageObjectHelper::shallowCopyPage()
{
Expand Down

0 comments on commit 278710f

Please sign in to comment.