-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmetadata_handler.py
78 lines (68 loc) · 3.74 KB
/
metadata_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from app import celery_app, db
from celery.exceptions import SoftTimeLimitExceeded
from app.models import User, FileMetadata
from app.docker_handler import extract_metadata
import os
import ast
@celery_app.task(bind=True, default_retry_delay=0)
def extract_user_metadata(self, file_path, authentication, extractor, cli_args=[]):
"""Extracts metadata from a file and writes a FileMetadata objeect to the SQL server.
Parameters:
file_path (str): File path of file to extract metadata from.
authentication (str): User authentication as returned by login().
extractor (str): Name of extractor to use to extract metadata.
cli_args (str): Additional command line arguments to pass to the extractors.
"""
try:
metadata_str = extract_metadata(extractor, file_path, cli_args)
user = User.query.filter_by(user_uuid=authentication).first()
file_metadata = FileMetadata(file_path=file_path, metadata_dict=metadata_str, user=user, extractor=extractor)
db.session.add(file_metadata)
db.session.commit()
try:
metadata_dict = ast.literal_eval(metadata_str)
if "json/xml" == list(metadata_dict.keys())[0]:
for metadata in FileMetadata.query.filter_by(file_path=file_path, user_uuid=authentication,
extractor='keyword'):
db.session.delete(metadata)
db.session.commit()
extract_user_metadata.apply_async(args=[file_path, authentication, "keyword",
["--text_string", metadata_dict["json/xml"]["strings"]]],
time_limit=10, queue='priority')
elif "tabular" == list(metadata_dict.keys())[0]:
for metadata in FileMetadata.query.filter_by(file_path=file_path, user_uuid=authentication,
extractor='keyword'):
db.session.delete(metadata)
db.session.commit()
extract_user_metadata.apply_async(args=[file_path, authentication, "keyword",
["--text_string", ' '.join(metadata_dict["tabular"]["physical"]["preamble"])]],
time_limit=10, queue='priority')
except:
pass
except SoftTimeLimitExceeded:
self.retry(soft_time_limit=None)
return metadata_str
def delete_user_metadata(file_path, authentication, extractor=None):
"""Deletes a users metadata for a given file.
Parameters:
file_path (str): File path of metadata to delete.
authentication (str): User authentication as returned by login().
extractor (str): Name of extractor to delete file_path metadata for. If None,
all metadata for file_path is deleted.
"""
if extractor is None:
metadata_to_delete = FileMetadata.query.filter_by(file_path=file_path, user_uuid=authentication).all()
else:
metadata_to_delete = FileMetadata.query.filter_by(file_path=file_path,
user_uuid=authentication,
extractor=extractor).all()
if len(metadata_to_delete) == 0:
return "Metadata for {} does not exist\n".format(os.path.basename(file_path))
else:
for metadata in metadata_to_delete:
db.session.delete(metadata)
db.session.commit()
if extractor is None:
return "Successfully deleted metadata for {}\n".format(os.path.basename(file_path))
else:
return "Successfully deleted {} metadata for {}\n".format(extractor, os.path.basename(file_path))