Skip to content

Commit

Permalink
Support comma-separated numeric values with --collate (fixes qpdf#505)
Browse files Browse the repository at this point in the history
  • Loading branch information
jberkenbilt committed Jan 1, 2024
1 parent 9db5d75 commit d8d70ec
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 39 deletions.
6 changes: 5 additions & 1 deletion ChangeLog
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
2024-01-01 Jay Berkenbilt <[email protected]>

* Support comma-separated numeric values with --collate to select
different group sizes from different files. Fixes #505.

* Support "x" before a group in a numeric range to exclude a group
from the previous group. Details are in the manual.
from the previous group. Details are in the manual. Fixes #564,
#790.

2023-12-29 Jay Berkenbilt <[email protected]>

Expand Down
2 changes: 1 addition & 1 deletion include/qpdf/QPDFJob.hh
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ class QPDFJob
bool show_filtered_stream_data{false};
bool show_pages{false};
bool show_page_images{false};
size_t collate{0};
std::vector<size_t> collate;
bool flatten_rotation{false};
bool list_attachments{false};
std::string attachment_to_show;
Expand Down
6 changes: 3 additions & 3 deletions job.sums
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ include/qpdf/auto_job_c_pages.hh b3cc0f21029f6d89efa043dcdbfa183cb59325b6506001c
include/qpdf/auto_job_c_uo.hh ae21b69a1efa9333050f4833d465f6daff87e5b38e5106e49bbef5d4132e4ed1
job.yml 4f89fc7b622df897d30d403d8035aa36fc7de8d8c43042c736e0300d904cb05c
libqpdf/qpdf/auto_job_decl.hh 9c6f701c29f3f764d620186bed92685a2edf2e4d11e4f4532862c05470cfc4d2
libqpdf/qpdf/auto_job_help.hh 838f4065f64dc3fbd493510fd21d8ab4e16ee2434592776f44f80cbe3045cb50
libqpdf/qpdf/auto_job_help.hh ea0d0cdebeb190d305bd5f9bca85a4430dbcfa0881ac9be839216b878765b379
libqpdf/qpdf/auto_job_init.hh b4c2b3724fba61f1206fd3bae81951636852592f67a63ef9539839c2c5995065
libqpdf/qpdf/auto_job_json_decl.hh 06caa46eaf71db8a50c046f91866baa8087745a9474319fb7c86d92634cc8297
libqpdf/qpdf/auto_job_json_init.hh f5acb9aa103131cb68dec0e12c4d237a6459bdb49b24773c24f0c2724a462b8f
libqpdf/qpdf/auto_job_schema.hh b53c006fec2e75b1b73588d242d49a32f7d3db820b1541de106c5d4c27fbb4d9
manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
manual/cli.rst d6d1ca82c936ffeaf137c586f988f80043db4c3b226d26fdf94f19a6005d012e
manual/qpdf.1 10dc52d32a6d8885ce4e4292875ee7fe8e7a826ef3fc28db5671be413bcaacc7
manual/cli.rst fc8488129c479b6cde9dffbbddb150bc1ffb45bc38b3bff2c5dba4378f0edb67
manual/qpdf.1 738dc9b732ad4c880d034b99f957077628fde1d0006943aaf813e98f8e2f9635
manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
35 changes: 18 additions & 17 deletions libqpdf/QPDFJob.cc
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#include <qpdf/QPDFJob.hh>

#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fcntl.h>
#include <iostream>
#include <memory>

Expand All @@ -14,13 +10,11 @@
#include <qpdf/Pl_DCT.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/Pl_Flate.hh>
#include <qpdf/Pl_OStream.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/Pl_String.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFArgParser.hh>
#include <qpdf/QPDFCryptoProvider.hh>
#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
#include <qpdf/QPDFExc.hh>
Expand Down Expand Up @@ -2419,26 +2413,32 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
dh.removePage(page);
}

if (m->collate && (parsed_specs.size() > 1)) {
auto n_collate = m->collate.size();
auto n_specs = parsed_specs.size();
if (n_collate > 0 && n_specs > 1) {
// Collate the pages by selecting one page from each spec in order. When a spec runs out of
// pages, stop selecting from it.
std::vector<QPDFPageData> new_parsed_specs;
size_t nspecs = parsed_specs.size();
size_t cur_page = 0;
// Make sure we have a collate value for each spec. We have already checked that a non-empty
// collate has either one value or one value per spec.
for (auto i = n_collate; i < n_specs; ++i) {
m->collate.push_back(m->collate.at(0));
}
std::vector<size_t> cur_page(n_specs, 0);
bool got_pages = true;
while (got_pages) {
got_pages = false;
for (size_t i = 0; i < nspecs; ++i) {
for (size_t i = 0; i < n_specs; ++i) {
QPDFPageData& page_data = parsed_specs.at(i);
for (size_t j = 0; j < m->collate; ++j) {
if (cur_page + j < page_data.selected_pages.size()) {
for (size_t j = 0; j < m->collate.at(i); ++j) {
if (cur_page.at(i) + j < page_data.selected_pages.size()) {
got_pages = true;
new_parsed_specs.emplace_back(
page_data, page_data.selected_pages.at(cur_page + j));
page_data, page_data.selected_pages.at(cur_page.at(i) + j));
}
}
cur_page.at(i) += m->collate.at(i);
}
cur_page += m->collate;
}
parsed_specs = new_parsed_specs;
}
Expand Down Expand Up @@ -3019,9 +3019,10 @@ QPDFJob::writeOutfile(QPDF& pdf)
try {
QUtil::remove_file(backup.c_str());
} catch (QPDFSystemError& e) {
*m->log->getError() << m->message_prefix << ": unable to delete original file ("
<< e.what() << ");" << " original file left in " << backup
<< ", but the input was successfully replaced\n";
*m->log->getError()
<< m->message_prefix << ": unable to delete original file (" << e.what() << ");"
<< " original file left in " << backup
<< ", but the input was successfully replaced\n";
}
}
}
Expand Down
31 changes: 28 additions & 3 deletions libqpdf/QPDFJob_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,27 @@ QPDFJob::Config::collate()
QPDFJob::Config*
QPDFJob::Config::collate(std::string const& parameter)
{
auto n = (parameter.empty() ? 1 : QUtil::string_to_uint(parameter.c_str()));
o.m->collate = QIntC::to_size(n);
if (parameter.empty()) {
o.m->collate.push_back(1);
return this;
}
size_t pos = 0;
// Parse a,b,c
while (true) {
auto end = parameter.find(',', pos);
auto n = parameter.substr(pos, end);
if (n.empty()) {
usage("--collate: trailing comma");
}
o.m->collate.push_back(QIntC::to_size(QUtil::string_to_uint(n.c_str())));
if (end == std::string::npos) {
break;
}
pos = end + 1;
}
if (o.m->collate.empty()) {
o.m->collate.push_back(1);
}
return this;
}

Expand Down Expand Up @@ -932,9 +951,15 @@ QPDFJob::Config::pages()
QPDFJob::Config*
QPDFJob::PagesConfig::endPages()
{
if (this->config->o.m->page_specs.empty()) {
auto n_specs = config->o.m->page_specs.size();
if (n_specs == 0) {
usage("--pages: no page specifications given");
}
auto n_collate = config->o.m->collate.size();
if (!(n_collate == 0 || n_collate == 1 || n_collate == n_specs)) {
usage("--pages: if --collate has more than one value, it must have one value per page "
"specification");
}
return this->config;
}

Expand Down
8 changes: 6 additions & 2 deletions libqpdf/qpdf/auto_job_help.hh
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,13 @@ ap.addOptionHelp("--pages", "modification", "begin page selection", R"(--pages f
Run qpdf --help=page-selection for details.
)");
ap.addOptionHelp("--collate", "modification", "collate with --pages", R"(--collate[=n]
ap.addOptionHelp("--collate", "modification", "collate with --pages", R"(--collate[=n[,m,...]]
Collate rather than concatenate pages specified with --pages.
With a numeric parameter, collate in groups of n. The default
is 1. Run qpdf --help=page-selection for additional details.
is 1. With comma-separated numeric parameters, take n from the
first file, m from the second, etc. Run
qpdf --help=page-selection for additional details.
)");
ap.addOptionHelp("--split-pages", "modification", "write pages to separate files", R"(--split-pages[=n]
Expand Down Expand Up @@ -607,6 +609,8 @@ Run qpdf --help=page-ranges for help with page ranges.
Use --collate=n to cause pages to be collated in groups of n pages
(default 1) instead of concatenating the input.
Use --collate=i,j,k,... to take i from the first, then j from the
second, then k from the third, then i from the first, etc.
Examples:
Expand Down
49 changes: 43 additions & 6 deletions manual/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1403,18 +1403,21 @@ Related Options
See also :qpdf:ref:`--split-pages`, :qpdf:ref:`--collate`,
:ref:`page-ranges`.

.. qpdf:option:: --collate[=n]
.. qpdf:option:: --collate[=n[,m,...]]
.. help: collate with --pages
Collate rather than concatenate pages specified with --pages.
With a numeric parameter, collate in groups of n. The default
is 1. Run qpdf --help=page-selection for additional details.
is 1. With comma-separated numeric parameters, take n from the
first file, m from the second, etc. Run
qpdf --help=page-selection for additional details.
This option causes :command:`qpdf` to collate rather than
concatenate pages specified with :qpdf:ref:`--pages`. With a
numeric parameter, collate in groups of :samp:`{n}`. The default
is 1.
is 1. With comma-separated numeric parameters, take :samp:`{n}`
from the first file, :samp:`{m}` from the second, etc.

Please see :ref:`page-selection` for additional details.

Expand Down Expand Up @@ -2335,6 +2338,8 @@ Page Selection
Use --collate=n to cause pages to be collated in groups of n pages
(default 1) instead of concatenating the input.
Use --collate=i,j,k,... to take i from the first, then j from the
second, then k from the third, then i from the first, etc.
Examples:
Expand Down Expand Up @@ -2383,9 +2388,13 @@ Notes:
See :ref:`page-ranges` for help on specifying a page range.

Use :samp:`--collate={n}` to cause pages to be collated in groups of
:samp:`{n}` pages (default 1) instead of concatenating the input. Note
that the :qpdf:ref:`--collate` appears outside of ``--pages ... --``
(before ``--pages`` or after ``--``). Pages are pulled from each
:samp:`{n}` pages (default 1) instead of concatenating the input. Use
:samp:`--collate={i},{j},{k},...` to take :samp:`{i}` from the first,
then :samp:`{j}` from the second, then :samp:`{k}` from the third,
then :samp:`{i}` from the first, etc.

Note that the :qpdf:ref:`--collate` appears outside of ``--pages ...
--`` (before ``--pages`` or after ``--``). Pages are pulled from each
document in turn. When a document is out of pages, it is skipped. See
examples below.

Expand Down Expand Up @@ -2481,6 +2490,34 @@ Examples

- a.pdf page 5

- You can specify a multiple numeric parameters to :qpdf:ref:`--collate`. With
:samp:`--collate={i,j,k}`, pull groups of :samp:`{i}` pages from the
first file, then :samp:`{j}` from the second, thenm :samp:`{k}` from
the third, repeating. The number of parameters must equal the number
of groups. For example, if you ran

::

qpdf --collate=2,1,3 --empty --pages a.pdf 1-5 b.pdf 6-4 c.pdf r1-r4 -- out.pdf

you would get the following pages in this order:

- a.pdf pages 1 and 2

- b.pdf page 6

- c.pdf last three pages in reverse order

- a.pdf pages 3 and 4

- b.pdf page 5

- c.pdf fourth to last page

- a.pdf page 5

- b.pdf page 4

- Take pages 1 through 5 from :file:`file1.pdf` and pages 11 through
15 in reverse from :file:`file2.pdf`, taking document-level metadata
from :file:`file2.pdf`.
Expand Down
8 changes: 6 additions & 2 deletions manual/qpdf.1
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,13 @@ Related Options:
Run qpdf --help=page-selection for details.
.TP
.B --collate \-\- collate with --pages
--collate[=n]
--collate[=n[,m,...]]

Collate rather than concatenate pages specified with --pages.
With a numeric parameter, collate in groups of n. The default
is 1. Run qpdf --help=page-selection for additional details.
is 1. With comma-separated numeric parameters, take n from the
first file, m from the second, etc. Run
qpdf --help=page-selection for additional details.
.TP
.B --split-pages \-\- write pages to separate files
--split-pages[=n]
Expand Down Expand Up @@ -737,6 +739,8 @@ Run qpdf --help=page-ranges for help with page ranges.

Use --collate=n to cause pages to be collated in groups of n pages
(default 1) instead of concatenating the input.
Use --collate=i,j,k,... to take i from the first, then j from the
second, then k from the third, then i from the first, etc.

Examples:

Expand Down
4 changes: 4 additions & 0 deletions manual/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ Planned changes for future 12.x (subject to change):
of pages within a page range. See :ref:`page-ranges` for
details.

- Support comma-separated numeric values with
:qpdf:ref:`--collate` to select different numbers of pages from
different groups.

11.7.0: December 24, 2023
- Bug fixes:

Expand Down
10 changes: 9 additions & 1 deletion qpdf/qtest/arg-parsing.test
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ cleanup();

my $td = new TestDriver('arg-parsing');

my $n_tests = 22;
my $n_tests = 23;

$td->runtest("required argument",
{$td->COMMAND => "qpdf --password minimal.pdf"},
Expand Down Expand Up @@ -94,6 +94,14 @@ $td->runtest("v2-only qpdf json-key",
{$td->REGEXP => ".*\"qpdf\" is only valid for json version > 1.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("wrong number of collate args",
{$td->COMMAND =>
"qpdf --collate=2,3 collate-odd.pdf" .
" --pages . minimal.pdf collate-even.pdf -- a.pdf"},
{$td->REGEXP => ".*--collate has more than one value.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);

# Ignoring -- at the top level was never intended but turned out to
# have been there for a long time so that people relied on it. It is
# intentionally not documented.
Expand Down
8 changes: 5 additions & 3 deletions qpdf/qtest/collate.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ my $td = new TestDriver('collate');
my @collate = (
["", "three-files", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
[1, "three-files", "collate-odd",
["1", "three-files", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
[2, "three-files-2", "collate-odd",
["2", "three-files-2", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
["2,3,4", "three-files-2,3,4", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
);
my $n_tests = 2 * scalar(@collate);
Expand All @@ -28,7 +30,7 @@ foreach my $d (@collate)
{
my ($n, $description, $first, $args) = @$d;
my $collate = '--collate';
if ($n)
if ($n ne "")
{
$collate .= "=$n";
}
Expand Down
Binary file not shown.

0 comments on commit d8d70ec

Please sign in to comment.