Skip to content

Commit

Permalink
try to run convert, but fall back on gs if needed
Browse files Browse the repository at this point in the history
  • Loading branch information
JensPfeifle committed Mar 3, 2019
1 parent ea282c2 commit 29b0886
Showing 1 changed file with 33 additions and 19 deletions.
52 changes: 33 additions & 19 deletions src/paperless_tesseract/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,25 +46,39 @@ def get_thumbnail(self):
"""

out_path = os.path.join(self.tempdir, "convert.png")
gs_out_path = os.path.join(self.tempdir, "gs_out.png")

# Extract the first PDF page as a PNG using Ghostscript
# https://github.com/danielquinn/paperless/issues/447
# call gs first
cmd = [self.GHOSTSCRIPT,
"-q",
"-sDEVICE=pngalpha",
"-o", gs_out_path,
self.document_path]
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
# then run convert on the output from gs
run_convert(
self.CONVERT,
"-scale", "500x5000",
"-alpha", "remove",
gs_out_path,
out_path

# Run convert to get a decent thumbnail
try:
run_convert(
self.CONVERT,
"-scale", "500x5000",
"-alpha", "remove",
"{}[0]".format(self.document_path),
out_path
)
except ParseError:
# if convert fails, fall back to extracting
# the first PDF page as a PNG using Ghostscript
self.log(
"warning",
"Thumbnail generation with ImageMagick failed, "
"falling back to Ghostscript."
)
gs_out_path = os.path.join(self.tempdir, "gs_out.png")
cmd = [self.GHOSTSCRIPT,
"-q",
"-sDEVICE=pngalpha",
"-o", gs_out_path,
self.document_path]
if not subprocess.Popen(cmd).wait() == 0:
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
# then run convert on the output from gs
run_convert(
self.CONVERT,
"-scale", "500x5000",
"-alpha", "remove",
gs_out_path,
out_path
)

return out_path
Expand Down

0 comments on commit 29b0886

Please sign in to comment.