Skip to content
This repository has been archived by the owner on Feb 16, 2023. It is now read-only.

Commit

Permalink
Merge branch 'dev' of github.com:jonaswinkler/paperless-ng into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
jonaswinkler committed Aug 18, 2021
2 parents 7411d7c + 68fb85c commit 49e049f
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 10 deletions.
1 change: 1 addition & 0 deletions paperless.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
#PAPERLESS_CONSUMER_POLLING=10
#PAPERLESS_CONSUMER_DELETE_DUPLICATES=false
#PAPERLESS_CONSUMER_RECURSIVE=false
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*"]
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
Expand Down
16 changes: 6 additions & 10 deletions src/documents/management/commands/document_consumer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
from pathlib import Path
from pathlib import Path, PurePath
from threading import Thread
from time import sleep

Expand Down Expand Up @@ -36,15 +36,11 @@ def _tags_from_path(filepath):
return tag_ids


def _is_ignored(filepath):
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
basename = os.path.basename(filepath)
if basename == ".DS_STORE":
return True
if basename.startswith("._"):
return True

return False
def _is_ignored(filepath: str) -> bool:
filepath_relative = PurePath(filepath).relative_to(
settings.CONSUMPTION_DIR)
return any(
filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)


def _consume(filepath):
Expand Down
16 changes: 16 additions & 0 deletions src/documents/tests/test_management_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,22 @@ def test_mac_write(self):
fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list]
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])

def test_is_ignored(self):
test_paths = [
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
(os.path.join(self.dirs.consumption_dir, "foo","bar.pdf"), False),
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"), True),
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
]
for file_path, expected_ignored in test_paths:
self.assertEqual(
expected_ignored,
document_consumer._is_ignored(file_path),
f'_is_ignored("{file_path}") != {expected_ignored}')


@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
class TestConsumerPolling(TestConsumer):
Expand Down
6 changes: 6 additions & 0 deletions src/paperless/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,12 @@ def default_threads_per_worker(task_workers):

CONSUMER_RECURSIVE = __get_boolean("PAPERLESS_CONSUMER_RECURSIVE")

# Ignore glob patterns, relative to PAPERLESS_CONSUMPTION_DIR
CONSUMER_IGNORE_PATTERNS = list(
json.loads(
os.getenv("PAPERLESS_CONSUMER_IGNORE_PATTERNS",
'[".DS_STORE/*", "._*", ".stfolder/*"]')))

CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")

OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
Expand Down

0 comments on commit 49e049f

Please sign in to comment.