Skip to content

Commit

Permalink
Merge branch 'dev' into travis-multiarch-builds
Browse files Browse the repository at this point in the history
  • Loading branch information
jonaswinkler committed Jan 7, 2021
2 parents 9bf4ce2 + 0a469cf commit 838631b
Show file tree
Hide file tree
Showing 143 changed files with 13,453 additions and 1,187 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@ scripts/nuke

# this is where the compiled frontend is moved to.
/src/documents/static/frontend/
/docs/.vscode/settings.json
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ RUN apt-get update \
&& apt-get -y --no-install-recommends install \
build-essential \
curl \
file \
fonts-liberation \
gettext \
ghostscript \
gnupg \
icc-profiles-free \
Expand Down Expand Up @@ -92,6 +94,8 @@ WORKDIR /usr/src/paperless/src/

RUN sudo -HEu paperless python3 manage.py collectstatic --clear --no-input

RUN sudo -HEu paperless python3 manage.py compilemessages

VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", "/usr/src/paperless/consume", "/usr/src/paperless/export"]
ENTRYPOINT ["/sbin/docker-entrypoint.sh"]
EXPOSE 8000
Expand Down
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ whitenoise = "~=5.2.0"
watchdog = "*"
whoosh="~=2.7.4"
inotifyrecursive = "~=0.3.4"
ocrmypdf = "*"
ocrmypdf = "~=11.4.5"
tqdm = "*"
tika = "*"

[dev-packages]
coveralls = "*"
Expand Down
631 changes: 347 additions & 284 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/jonaswinkler/paperless-ng.svg?branch=master)](https://travis-ci.org/jonaswinkler/paperless-ng)
[![Build Status](https://travis-ci.com/jonaswinkler/paperless-ng.svg?branch=master)](https://travis-ci.com/jonaswinkler/paperless-ng)
[![Documentation Status](https://readthedocs.org/projects/paperless-ng/badge/?version=latest)](https://paperless-ng.readthedocs.io/en/latest/?badge=latest)
[![Gitter](https://badges.gitter.im/paperless-ng/community.svg)](https://gitter.im/paperless-ng/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![Docker Hub Pulls](https://img.shields.io/docker/pulls/jonaswinkler/paperless-ng.svg)](https://hub.docker.com/r/jonaswinkler/paperless-ng)
Expand Down
43 changes: 43 additions & 0 deletions docker-compose.tika.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
version: "3.4"
services:
broker:
image: redis:6.0
restart: always

webserver:
build: .
restart: always
depends_on:
- broker
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_TIKA_ENABLED: 1
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
PAPERLESS_TIKA_ENDPOINT: http://tika:9998

gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1

tika:
image: apache/tika
restart: unless-stopped

volumes:
data:
media:
2 changes: 1 addition & 1 deletion docker/gunicorn.conf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
bind = '[::]:8000'
bind = '0.0.0.0:8000'
backlog = 2048
workers = 3
worker_class = 'sync'
Expand Down
2 changes: 1 addition & 1 deletion docker/hub/docker-compose.postgres.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ services:
POSTGRES_PASSWORD: paperless

webserver:
image: jonaswinkler/paperless-ng:0.9.9
image: jonaswinkler/paperless-ng:0.9.12
restart: always
depends_on:
- db
Expand Down
2 changes: 1 addition & 1 deletion docker/hub/docker-compose.sqlite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ services:
restart: always

webserver:
image: jonaswinkler/paperless-ng:0.9.9
image: jonaswinkler/paperless-ng:0.9.12
restart: always
depends_on:
- broker
Expand Down
43 changes: 43 additions & 0 deletions docker/hub/docker-compose.tika.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
version: "3.4"
services:
broker:
image: redis:6.0
restart: always

webserver:
image: jonaswinkler/paperless-ng:0.9.12
restart: always
depends_on:
- broker
ports:
- 8000:8000
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
volumes:
- data:/usr/src/paperless/data
- media:/usr/src/paperless/media
- ./export:/usr/src/paperless/export
- ./consume:/usr/src/paperless/consume
env_file: docker-compose.env
environment:
PAPERLESS_REDIS: redis://broker:6379
PAPERLESS_TIKA_ENABLED: 1
PAPERLESS_TIKA_GOTENBERG_ENDPOINT: http://gotenberg:3000
PAPERLESS_TIKA_ENDPOINT: http://tika:9998

gotenberg:
image: thecodingmachine/gotenberg
restart: unless-stopped
environment:
DISABLE_GOOGLE_CHROME: 1

tika:
image: apache/tika
restart: unless-stopped

volumes:
data:
media:
2 changes: 1 addition & 1 deletion docker/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ loglevel=info ; log level; default info; others: debug,warn,trace
user=root

[program:gunicorn]
command=gunicorn -c /usr/src/paperless/gunicorn.conf.py -b '[::]:8000' paperless.wsgi
command=gunicorn -c /usr/src/paperless/gunicorn.conf.py paperless.wsgi
user=paperless

stdout_logfile=/dev/stdout
Expand Down
6 changes: 6 additions & 0 deletions docs/administration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,13 @@ After grabbing the new release and unpacking the contents, do the following:
$ cd src
$ pipenv run python3 manage.py migrate
5. Update translation files.

.. code:: shell-session
$ cd src
$ pipenv run python3 manage.py compilemessages
Management utilities
####################
Expand Down
83 changes: 83 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,86 @@
Changelog
*********

paperless-ng 0.9.12
###################

* Paperless localization

* Thanks to the combined efforts of many users, Paperless is now available in English, Dutch, French and German.

* Thanks to `Jo Vandeginste`_, Paperless has optional support for Office documents such as .docx, .doc, .odt and more.

* See the :ref:`configuration<configuration-tika>` on how to enable this feature. This feature requires two additional services
(one for parsing Office documents and metadata extraction and another for converting Office documents to PDF), and is therefore
not enabled on default installations.
* As with all other documents, paperless converts Office documents to PDF and stores both the original as well as the archived PDF.

* Dark mode

* Thanks to `Michael Shamoon`_, paperless now has a dark mode. Configuration is available in the settings.

* Other changes and additions

* The PDF viewer now uses a local copy of some dependencies instead of fetching them from the internet. Thanks to `slorenz`_.
* Revamped search bar styling thanks to `Michael Shamoon`_.
* Sorting in the document list by clicking on table headers.
* A button was added to the document detail page that assigns a new ASN to a document.
* Form field validation: When providing invalid input in a form (such as a duplicate ASN or no name), paperless now has visual
indicators and clearer error messages about what's wrong.
* Paperless disables buttons with network actions (such as save and delete) when a network action is active. This indicates that
something is happening and prevents double clicking.
* When using "Save & next", the title field is focussed automatically to better support keyboard editing.
* E-Mail: Added filter rule parameters to allow inline attachments (watch out for mails with inlined images!) and attachment filename filters
with wildcards.
* Support for remote user authentication thanks to `Michael Shamoon`_. This is useful for hiding Paperless behind single sign on applications
such as `authelia <https://www.authelia.com/>`_.
* "Clear filters" has been renamed to "Reset filters" and now correctly restores the default filters on saved views. Thanks to `Michael Shamoon`_

* Fixes

* Paperless was unable to save views when "Not assigned" was chosen in one of the filter dropdowns.
* Clearer error messages when pre and post consumption scripts do not exist.
* The post consumption script is executed later in the consumption process. Before the change, an ID was passed to the script referring to
a document that did not yet exist in the database.

paperless-ng 0.9.11
###################

* Fixed an issue with the docker image not starting at all due to a configuration change of the web server.


paperless-ng 0.9.10
###################

* Bulk editing

* Thanks to `Michael Shamoon`_, we've got a new interface for the bulk editor.
* There are some configuration options in the settings to alter the behavior.

* Other changes and additions

* Thanks to `zjean`_, paperless now publishes a webmanifest, which is useful for adding the application to home screens on mobile devices.
* The Paperless-ng logo now navigates to the dashboard.
* Filter for documents that don't have any correspondents, types or tags assigned.
* Tags, types and correspondents are now sorted case insensitive.
* Lots of preparation work for localization support.

* Fixes

* Added missing dependencies for Raspberry Pi builds.
* Fixed an issue with plain text file consumption: Thumbnail generation failed due to missing fonts.
* An issue with the search index reporting missing documents after bulk deletes was fixed.
* Issue with the tag selector not clearing input correctly.
* The consumer used to stop working when encountering an incomplete classifier model file.

.. note::

The bulk delete operations did not update the search index. Therefore, documents that you deleted remained in the index and
caused the search to return messages about missing documents when searching. Further bulk operations will properly update
the index.

However, this change is not retroactive: If you used the delete method of the bulk editor, you need to reindex your search index
by :ref:`running the management command document_index with the argument reindex <administration-index>`.

paperless-ng 0.9.9
##################
Expand Down Expand Up @@ -927,6 +1007,9 @@ bulk of the work on this big change.

* Initial release

.. _slorenz: https://github.com/sisao
.. _Jo Vandeginste: https://github.com/jovandeginste
.. _zjean: https://github.com/zjean
.. _rYR79435: https://github.com/rYR79435
.. _Michael Shamoon: https://github.com/shamoon
.. _jayme-github: http://github.com/jayme-github
Expand Down
74 changes: 67 additions & 7 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ PAPERLESS_COOKIE_PREFIX=<str>

Defaults to ``""``, which does not alter the cookie names.

PAPERLESS_ENABLE_HTTP_REMOTE_USER=<bool>
Allows authentication via HTTP_REMOTE_USER which is used by some SSO
applications.

Defaults to `false` which disables this feature.

.. _configuration-ocr:

OCR settings
Expand Down Expand Up @@ -210,20 +216,20 @@ PAPERLESS_OCR_MODE=<mode>
into images and puts the OCRed text on top. This works for all documents,
however, the resulting document may be significantly larger and text
won't appear as sharp when zoomed in.

The default is ``skip``, which only performs OCR when necessary and always
creates archived documents.

PAPERLESS_OCR_OUTPUT_TYPE=<type>
Specify the the type of PDF documents that paperless should produce.

* ``pdf``: Modify the PDF document as little as possible.
* ``pdfa``: Convert PDF documents into PDF/A-2b documents, which is a
subset of the entire PDF specification and meant for storing
documents long term.
* ``pdfa-1``, ``pdfa-2``, ``pdfa-3`` to specify the exact version of
PDF/A you wish to use.

If not specified, ``pdfa`` is used. Remember that paperless also keeps
the original input file as well as the archived version.

Expand Down Expand Up @@ -275,9 +281,38 @@ PAPERLESS_OCR_USER_ARG=<json>

.. code:: json
{"deskew": true, "optimize": 3, "unpaper_args": "--pre-rotate 90"}
{"deskew": true, "optimize": 3, "unpaper_args": "--pre-rotate 90"}
.. _configuration-tika:

Tika settings
#############

Paperless can make use of `Tika <https://tika.apache.org/>`_ and
`Gotenberg <https://thecodingmachine.github.io/gotenberg/>`_ for parsing and
converting "Office" documents (such as ".doc", ".xlsx" and ".odt"). If you
wish to use this, you must provide a Tika server and a Gotenberg server,
configure their endpoints, and enable the feature.

If you run paperless on docker, you can add those services to the docker-compose
file (see the examples provided).

PAPERLESS_TIKA_ENABLED=<bool>
Enable (or disable) the Tika parser.

Defaults to false.

PAPERLESS_TIKA_ENDPOINT=<url>
Set the endpoint URL were Paperless can reach your Tika server.

Defaults to "http://localhost:9998".

PAPERLESS_TIKA_GOTENBERG_ENDPOINT=<url>
Set the endpoint URL were Paperless can reach your Gotenberg server.

Defaults to "http://localhost:3000".


Software tweaks
###############

Expand Down Expand Up @@ -319,11 +354,14 @@ PAPERLESS_TIME_ZONE=<timezone>
Defaults to UTC.


.. _configuration-polling:

PAPERLESS_CONSUMER_POLLING=<num>
If paperless won't find documents added to your consume folder, it might
not be able to automatically detect filesystem changes. In that case,
specify a polling interval in seconds here, which will then cause paperless
to periodically check your consumption directory for changes.
to periodically check your consumption directory for changes. This will also
disable listening for file system changes with ``inotify``.

Defaults to 0, which disables polling and uses filesystem notifications.

Expand Down Expand Up @@ -400,6 +438,28 @@ PAPERLESS_FILENAME_DATE_ORDER=<format>

Defaults to none, which disables this feature.

PAPERLESS_THUMBNAIL_FONT_NAME=<filename>
Paperless creates thumbnails for plain text files by rendering the content
of the file on an image and uses a predefined font for that. This
font can be changed here.

Note that this won't have any effect on already generated thumbnails.

Defaults to ``/usr/share/fonts/liberation/LiberationSerif-Regular.ttf``.

PAPERLESS_IGNORE_DATES=<string>
Paperless parses a documents creation date from filename and file content.
You may specify a comma separated list of dates that should be ignored during
this process. This is useful for special dates (like date of birth) that appear
in documents regularly but are very unlikely to be the documents creation date.

You may specify dates in a multitude of formats supported by dateparser (see
https://dateparser.readthedocs.io/en/latest/#popular-formats) but as the dates
need to be comma separated, the options are limited.
Example: "2020-12-02,22.04.1999"

Defaults to an empty string to not ignore any dates.


Binaries
########
Expand Down
Loading

0 comments on commit 838631b

Please sign in to comment.