Skip to content

Commit

Permalink
added sanity checker management command for manual execution jonaswin…
Browse files Browse the repository at this point in the history
  • Loading branch information
jonaswinkler committed Feb 13, 2021
1 parent ed478a1 commit 8b2965d
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docker/install_management_commands.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails;
for command in document_archiver document_exporter document_importer mail_fetcher document_create_classifier document_index document_renamer document_retagger document_thumbnails document_sanity_checker;
do
echo "installing $command..."
sed "s/management_command/$command/g" management_script.sh > /usr/local/bin/$command
Expand Down
28 changes: 28 additions & 0 deletions docs/administration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,34 @@ the naming scheme.
The command takes no arguments and processes all your documents at once.


.. _utilities-sanity-checker:

Sanity checker
==============

Paperless has a built-in sanity checker that inspects your document collection for issues.

The issues detected by the sanity checker are as follows:

* Missing original files.
* Missing archive files.
* Inaccessible original files due to improper permissions.
* Inaccessible archive files due to improper permissions.
* Corrupted original documents by comparing their checksum against what is stored in the database.
* Corrupted archive documents by comparing their checksum against what is stored in the database.
* Missing thumbnails.
* Inaccessible thumbnails due to improper permissions.
* Documents without any content (warning).
* Orphaned files in the media directory (warning). These are files that are not referenced by any document im paperless.


.. code::
document_sanity_checker
The command takes no arguments. Depending on the size of your document archive, this may take some time.


Fetching e-mail
===============

Expand Down
2 changes: 2 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ paperless-ng 1.1.2

* Always show top left corner of thumbnails, even for extra wide documents.

* Added a management command for executing the sanity checker directly. See :ref:`utilities-sanity-checker`.

paperless-ng 1.1.1
##################

Expand Down
27 changes: 27 additions & 0 deletions src/documents/management/commands/document_sanity_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import logging
from django.core.management.base import BaseCommand
from documents.sanity_checker import check_sanity, SanityError, SanityWarning

logger = logging.getLogger("paperless.management.sanity_checker")


class Command(BaseCommand):

help = """
This command checks your document archive for issues.
""".replace(" ", "")

def handle(self, *args, **options):

messages = check_sanity(progress=True)

if len(messages) == 0:
logger.info("No issues found.")
else:
for msg in messages:
if type(msg) == SanityError:
logger.error(str(msg))
elif type(msg) == SanityWarning:
logger.warning(str(msg))
else:
logger.info((str(msg)))
10 changes: 8 additions & 2 deletions src/documents/sanity_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

from django.conf import settings
from tqdm import tqdm

from documents.models import Document

Expand Down Expand Up @@ -38,7 +39,7 @@ def __str__(self):
f"{message_string}\n\n===============\n\n")


def check_sanity():
def check_sanity(progress=False):
messages = []

present_files = []
Expand All @@ -50,7 +51,12 @@ def check_sanity():
if lockfile in present_files:
present_files.remove(lockfile)

for doc in Document.objects.all():
if progress:
docs = tqdm(Document.objects.all())
else:
docs = Document.objects.all()

for doc in docs:
# Check sanity of the thumbnail
if not os.path.isfile(doc.thumbnail_path):
messages.append(SanityError(
Expand Down
35 changes: 35 additions & 0 deletions src/documents/tests/test_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_naming_priorities(self):
self.assertEqual(doc1.archive_filename, "document.pdf")
self.assertEqual(doc2.archive_filename, "document_01.pdf")


class TestDecryptDocuments(TestCase):

@override_settings(
Expand Down Expand Up @@ -154,3 +155,37 @@ def test_create_classifier(self, m):
call_command("document_create_classifier")

m.assert_called_once()


class TestSanityChecker(DirectoriesMixin, TestCase):

def test_no_errors(self):
with self.assertLogs() as capture:
call_command("document_sanity_checker")

self.assertEqual(len(capture.output), 1)
self.assertIn("No issues found.", capture.output[0])

@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
def test_warnings(self, error, warning):
doc = Document.objects.create(title="test", filename="test.pdf", checksum="d41d8cd98f00b204e9800998ecf8427e")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()

call_command("document_sanity_checker")

error.assert_not_called()
warning.assert_called()

@mock.patch("documents.management.commands.document_sanity_checker.logger.warning")
@mock.patch("documents.management.commands.document_sanity_checker.logger.error")
def test_errors(self, error, warning):
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
Path(doc.source_path).touch()
Path(doc.thumbnail_path).touch()

call_command("document_sanity_checker")

warning.assert_not_called()
error.assert_called()

0 comments on commit 8b2965d

Please sign in to comment.