From 178375eeae18430dddccfd7e6c58566e7872c011 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Thu, 15 Aug 2024 17:14:30 -0700 Subject: [PATCH 001/115] remove binder from the queuing services --- ezidapp/management/commands/proc-cleanup-async-queues.py | 2 -- impl/enqueue.py | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/ezidapp/management/commands/proc-cleanup-async-queues.py b/ezidapp/management/commands/proc-cleanup-async-queues.py index 88a8bf2ba..d9f578ff1 100644 --- a/ezidapp/management/commands/proc-cleanup-async-queues.py +++ b/ezidapp/management/commands/proc-cleanup-async-queues.py @@ -35,7 +35,6 @@ class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): setting = 'DAEMONS_QUEUE_CLEANUP_ENABLED' queueType = { - 'binder': ezidapp.models.async_queue.BinderQueue, 'crossref': ezidapp.models.async_queue.CrossrefQueue, 'datacite': ezidapp.models.async_queue.DataciteQueue, 'search': ezidapp.models.async_queue.SearchIndexerQueue @@ -72,7 +71,6 @@ def run(self): # set status for each handle system identifierStatus = { - 'binder' : False, 'crossref' : False, 'datacite' : False, 'search' : False diff --git a/impl/enqueue.py b/impl/enqueue.py index 82690f3be..aacf5ae27 100644 --- a/impl/enqueue.py +++ b/impl/enqueue.py @@ -49,7 +49,7 @@ def enqueue( ref_id_model = create_ref_id_model(si_model) queue_model_list = [ezidapp.models.async_queue.SearchIndexerQueue] - # Do not add reserved identifiers to binder, crossref, or datacite queues + # Do not add reserved identifiers to crossref, or datacite queues # When the identifier entry is updated to not be reserved, then the various # external services will be called as appropriate. # See https://github.com/CDLUC3/ezid/blob/v2.0.6/impl/backproc.py#L117 for @@ -58,7 +58,6 @@ def enqueue( if updateExternalServices: queue_model_list.extend( ( - ezidapp.models.async_queue.BinderQueue, ezidapp.models.async_queue.CrossrefQueue, ezidapp.models.async_queue.DataciteQueue, ) From 93df070e185e2df3532c4e8099f9bf29b83fbcfa Mon Sep 17 00:00:00 2001 From: jsjiang Date: Thu, 15 Aug 2024 17:18:00 -0700 Subject: [PATCH 002/115] remove binder from diag-identifier --- .../management/commands/diag-identifier.py | 158 ------------------ 1 file changed, 158 deletions(-) diff --git a/ezidapp/management/commands/diag-identifier.py b/ezidapp/management/commands/diag-identifier.py index 9dbc07c68..5d379faba 100644 --- a/ezidapp/management/commands/diag-identifier.py +++ b/ezidapp/management/commands/diag-identifier.py @@ -199,28 +199,6 @@ def add_arguments(self, parser:argparse.ArgumentParser): help="Ending date for metrics" ) - _syncmeta = subparsers.add_parser( - "syncmeta", - help=("Sends metadata to N2T for each row text file source. Rows starting with space or '#' are ignored.\n" - "Example:\n" - " ./manage.py diag-identifier syncmeta -f pid_list.txt" - ) - ) - _syncmeta.add_argument( - '-f', - '--from', - type=str, - help="Text file with one identifier per line.", - required=True - ) - _syncmeta.add_argument( - '-s', - '--start', - type=str, - help="Identifier in list to start from", - default=None - ) - def diff_n2t(self, identifier:ezidapp.models.identifier)->dict: res = {} @@ -242,52 +220,6 @@ def diff_n2t(self, identifier:ezidapp.models.identifier)->dict: return res - def prepare_n2t_metadata(self, identifier:ezidapp.models.identifier, n2t_meta:typing.Optional[dict]=None)->dict: - '''Prepare metadata for sending to N2T - - Returns a dictionary of metadata for identifier that can be sent to - N2T using impl.noid_egg.setElements(identifier.identifier, m) to - set or update the N2T entry for identifier. - - Metadata is sent to N2T for all states except Reserved DOIs, for which N2T is generally null. - ''' - _legacy = identifier.toLegacy() - # See proc_binder.update - if n2t_meta is None: - # Retrieve the existing metadata from N2T - n2t_meta = impl.noid_egg.getElements(identifier.identifier) - # if no metadata on N2T then initialize a blank for population. - if n2t_meta is None: - n2t_meta = {} - - if identifier.isReserved: - #special case - reserved - do nothing - log.info("Reserved DOIs have null N2T metadata.") - # To delete metadata on N2T, send keys with empty values, but we don't want to - # delete all the keys since that has the effect of deleting the identifier from N2T. - #for k in n2t_meta: - # n2t_meta[k] = "" - #return n2t_meta - return {} - - # First, update m with provided metadata - for k, v in list(_legacy.items()): - # If the provided metadata matches existing, then ignore - if n2t_meta.get(k) == v: - del n2t_meta[k] - # Otherwise add property to list for sending back to N2T - else: - n2t_meta[k] = v - # If properties retrieved from N2T are not present in the supplied - # update metadata, then set the value of the field to an empty string. - # An empty value results in an "rm" (remove) operation for that field - # being sent to N2T. - for k in list(n2t_meta.keys()): - if k not in _legacy: - n2t_meta[k] = "" - return n2t_meta - - def handle_show(self, *args, **opts): def jsonable_instance(o): if o is None: @@ -340,31 +272,6 @@ def tstamp_to_text(t): entry["cm_eq_metadata"] = _mequal except zlib.error: log.info("No cm section in %s", identifier.identifier) - n2t_meta = None - if opts["N2T"]: - # Retrieve entry from N2T - n2t_meta = impl.noid_egg.getElements(identifier.identifier) - entry["n2t"] = n2t_meta - if opts["sync"]: - _legacy = identifier.toLegacy() - # See proc_binder.update - # Retrieve the existing metadata from N2T - m = self.prepare_n2t_metadata(identifier, n2t_meta) - if len(m) > 0: - log.warning("Updating N2T metadata for %s", identifier.identifier) - log.info("Pending updates for %s:\n%s", identifier.identifier, m) - self.stdout.write(f"About to update {identifier.identifier} !") - response = input("Enter Y to continue, anything else aborts: ") - if response.strip() == 'Y': - impl.noid_egg.setElements(identifier.identifier, m) - ## - # Retrieve the updated metadata and add to the entry - entry["n2t_updated"] = impl.noid_egg.getElements(identifier.identifier) - else: - self.stdout.write("Aborted.") - else: - log.info("No pending updates for %s", identifier.identifier) - entries.append(entry) self.stdout.write(json.dumps(entries, indent=2, sort_keys=True)) @@ -476,69 +383,6 @@ def handle_metrics(self, *args, **opts): for row in cursor.fetchall(): writer.writerow(row) - - def handle_syncmeta(self, *args, **opts): - '''For each line in `from`: - update metadata - ''' - fn_src = opts.get('from') - fn_dst = fn_src + ".json" - start_at = opts.get("start", None) - log.info("Recording changes to %s", fn_dst) - identifiers = [] - add_id = True - if start_at is not None: - add_id = False - start_at = start_at.strip() - with open(fn_src) as _src: - for row in _src: - if row.startswith('ark:') or row.startswith('doi:'): - if not add_id: - if row.strip() == start_at: - add_id = True - if add_id: - identifiers.append(row.strip()) - log.info("Loaded %s identifiers from %s", len(identifiers), fn_src) - log.info("Loading status...") - with open(fn_dst, 'a') as f_dest: - for pid in identifiers: - self.stdout.write(pid) - result = {'pid':pid, 'original': {}, 'change': {}, 'updated':{}} - identifier = ezidapp.models.identifier.SearchIdentifier.objects.get(identifier=pid) - if identifier is None: - log.error('Identifier %s could not be loaded!', pid) - break - if identifier.isDatacite: - # handle datacite target url - doi = identifier.identifier[4:] - datacenter = str(identifier.datacenter) - log.info("Setting target for %s (%s) to %s", doi, datacenter, identifier.resolverTarget) - r = impl.datacite.setTargetUrl(doi, identifier.resolverTarget, datacenter) - if r is not None: - # There was a failure in the request - log.error("Failed to set target url for DataCite DOI: %s", doi) - pass - elif identifier.isCrossref: - # handle crossref target url - pass - result['original'] = impl.noid_egg.getElements(identifier.identifier) - n2t_meta = copy.deepcopy(result['original']) - result['change'] = self.prepare_n2t_metadata(identifier, n2t_meta=n2t_meta) - self.stdout.write(json.dumps(result['change'])) - if result['change'] != {}: - # Send update request - impl.noid_egg.setElements(identifier.identifier, result['change']) - # Retrieve the updated n2t meta - result['updated'] = impl.noid_egg.getElements(identifier.identifier) - else: - # no change - result['updated'] = result['original'] - f_dest.write(json.dumps(result)) - f_dest.write("\n") - f_dest.flush() - - - def handle(self, *args, **opts): operation = opts['operation'] if operation == 'show': @@ -552,7 +396,5 @@ def handle(self, *args, **opts): self.handle_resolve(*args, **opts) elif operation == 'metrics': self.handle_metrics(*args, **opts) - elif operation =='syncmeta': - self.handle_syncmeta(*args, **opts) From 2f1fcccbb34132a32fc4b8f589960381743bd0fd Mon Sep 17 00:00:00 2001 From: jsjiang Date: Thu, 15 Aug 2024 17:18:20 -0700 Subject: [PATCH 003/115] remove binder configs --- ansible/group_vars/all | 3 --- settings/settings.py.j2 | 7 ------- settings/tests.py | 7 ------- tests/util/create_settings.py | 3 --- tests/util/mk_test_settings.py | 3 --- 5 files changed, 23 deletions(-) diff --git a/ansible/group_vars/all b/ansible/group_vars/all index cb00737d7..700962623 100644 --- a/ansible/group_vars/all +++ b/ansible/group_vars/all @@ -83,9 +83,6 @@ admin_username: "{{ ssm_params['admin_username'] }}" allocator_cdl_password: "{{ ssm_params['allocator_cdl_password'] }}" allocator_purdue_password: "{{ ssm_params['allocator_purdue_password'] }}" -binder_url: "{{ ssm_params['binder_url'] }}" -binder_username: "{{ ssm_params['binder_username'] }}" -binder_password: "{{ ssm_params['binder_password'] }}" cloudwatch_instance_name: "{{ ansible_facts.hostname }}" crossref_username: "{{ ssm_params['crossref_username'] }}" crossref_password: "{{ ssm_params['crossref_password'] }}" diff --git a/settings/settings.py.j2 b/settings/settings.py.j2 index 6d0fc2064..9b63e92a1 100644 --- a/settings/settings.py.j2 +++ b/settings/settings.py.j2 @@ -126,7 +126,6 @@ if DAEMONS_ENABLED == 'auto': # - True: The daemon is available to be started. # - False: The daemon cannot run. # - See the DAEMONS_ENABLED setting above. -DAEMONS_BINDER_ENABLED = True DAEMONS_QUEUE_CLEANUP_ENABLED = True DAEMONS_CROSSREF_ENABLED = True DAEMONS_DATACITE_ENABLED = True @@ -430,12 +429,6 @@ S3_BUCKET_DOWNLOAD_PATH = 'download' GZIP_COMMAND = '/usr/bin/gzip' ZIP_COMMAND = '/usr/bin/zip' -BINDER_URL = '{{ binder_url }}' -BINDER_USERNAME = '{{ binder_username }}' -BINDER_PASSWORD = '{{ binder_password }}' -BINDER_NUM_ATTEMPTS = 3 -BINDER_REATTEMPT_DELAY = 5 - # The ARK resolvers correspond to the above binders. RESOLVER_DOI = '{{ resolver_doi }}' RESOLVER_ARK = '{{ resolver_ark }}' diff --git a/settings/tests.py b/settings/tests.py index 2bf524937..8df2992e1 100644 --- a/settings/tests.py +++ b/settings/tests.py @@ -128,7 +128,6 @@ # - True: The daemon is available to be started. # - False: The daemon cannot run. # - See the DAEMONS_ENABLED setting above. -DAEMONS_BINDER_ENABLED = True DAEMONS_CROSSREF_ENABLED = True DAEMONS_DATACITE_ENABLED = True DAEMONS_DOWNLOAD_ENABLED = True @@ -389,12 +388,6 @@ GZIP_COMMAND = '/usr/bin/gzip' ZIP_COMMAND = '/usr/bin/zip' -BINDER_URL = 'https://n2t-stg.n2t.net/a/ezid/b' -BINDER_USERNAME = 'ezid' -BINDER_PASSWORD = '' -BINDER_NUM_ATTEMPTS = 3 -BINDER_REATTEMPT_DELAY = 5 - # The ARK resolvers correspond to the above binders. RESOLVER_DOI = 'https://doi.org' RESOLVER_ARK = 'https://n2t-stg.n2t.net' diff --git a/tests/util/create_settings.py b/tests/util/create_settings.py index ceeacdf94..672d18460 100755 --- a/tests/util/create_settings.py +++ b/tests/util/create_settings.py @@ -23,9 +23,6 @@ 'email_new_account': 'invalid@invalid.invalid', 'admin_username': 'admin', 'admin_password': 'admin', - 'binder_url': 'https://n2t-stg.n2t.net/a/ezid/b', - 'binder_username': 'ezid', - 'binder_password': '', 'resolver_doi': 'https://doi.org', 'resolver_ark': 'https://n2t-stg.n2t.net', 'datacite_doi_url': 'https://mds.datacite.org/doi', diff --git a/tests/util/mk_test_settings.py b/tests/util/mk_test_settings.py index be57c5b03..e05f1aae6 100755 --- a/tests/util/mk_test_settings.py +++ b/tests/util/mk_test_settings.py @@ -51,9 +51,6 @@ 'admin_search_user_pid': 'ark:/99166/p9kw57h4w', 'admin_search_group_pid': 'ark:/99166/p9g44hq02', # Misc - 'binder_url': 'https://n2t-stg.n2t.net/a/ezid/b', - 'binder_username': 'ezid', - 'binder_password': '', 'resolver_doi': 'https://doi.org', 'resolver_ark': 'https://n2t-stg.n2t.net', 'datacite_doi_url': 'https://mds.datacite.org/doi', From 8e2be2cb91b06d1717ef682ced4c0262fb64f821 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Fri, 16 Aug 2024 12:05:50 -0700 Subject: [PATCH 004/115] Remove n2t/binder related code Remove noid_egg library and dependencies Remove proc-binder job Remove n2t diag-identifier tool --- .../management/commands/diag-identifier.py | 55 +--- ezidapp/management/commands/proc-binder.py | 119 --------- impl/api.py | 5 +- impl/noid_egg.py | 246 ------------------ impl/noid_nog_standalone.py | 50 ---- migrate_db_to_py3/diag-create-fixtures.py | 1 - 6 files changed, 2 insertions(+), 474 deletions(-) delete mode 100644 ezidapp/management/commands/proc-binder.py delete mode 100644 impl/noid_egg.py delete mode 100644 impl/noid_nog_standalone.py diff --git a/ezidapp/management/commands/diag-identifier.py b/ezidapp/management/commands/diag-identifier.py index 5d379faba..530f0d652 100644 --- a/ezidapp/management/commands/diag-identifier.py +++ b/ezidapp/management/commands/diag-identifier.py @@ -9,8 +9,6 @@ This command does not alter any information in the database, and should be safe to run at any time, including a running production instance. -Note however, that this command MAY alter the information in N2T when the --sync option -is used. Confirmation is requested before any metadata updates are propagated to N2T. """ import argparse @@ -36,7 +34,6 @@ import ezidapp.models.identifier import ezidapp.models.user import impl.datacite -import impl.noid_egg log = logging.getLogger(__name__) @@ -67,7 +64,7 @@ def add_arguments(self, parser:argparse.ArgumentParser): _show = subparsers.add_parser( "show", - help=("Show available metadata for an identifier, and optionally sync the N2T record.\n" + help=("Show available metadata for an identifier.\n" "Example:\n" " Default:\n" " ./manage.py diag-identifier show ark:/62930/d1n739\n") @@ -84,12 +81,6 @@ def add_arguments(self, parser:argparse.ArgumentParser): action='store_true', help='Show Identifier instead of SearchIdentifier table entry', ) - _show.add_argument( - '-y', - '--legacy', - action='store_true', - help='Show legacy form of identifier record', - ) _show.add_argument( '-m', '--cm', @@ -108,17 +99,6 @@ def add_arguments(self, parser:argparse.ArgumentParser): action='store_true', help='Convert timestamps to textual time representation', ) - _show.add_argument( - '-N', - '--N2T', - action='store_true', - help='Retrieve record from N2T if available', - ) - _show.add_argument( - '--sync', - action='store_true', - help="Synchronize the N2T entry with metadata from the database.", - ) _list = subparsers.add_parser( "list", @@ -154,11 +134,6 @@ def add_arguments(self, parser:argparse.ArgumentParser): default=[], help="Comma separated list of fields in addition to identifier to list." ) - _list.add_argument( - '--compare', - action='store_true', - help='Show difference between EZID and N2T metadata.', - ) _list.add_argument( '-m', '--max_rows', @@ -199,27 +174,6 @@ def add_arguments(self, parser:argparse.ArgumentParser): help="Ending date for metrics" ) - - def diff_n2t(self, identifier:ezidapp.models.identifier)->dict: - res = {} - n2t_meta = impl.noid_egg.getElements(identifier.identifier) - if n2t_meta is None: - n2t_meta = {} - _legacy = identifier.toLegacy() - for k, v in _legacy.items(): - res[k] = [v, None] - # If properties retrieved from N2T are not present in the supplied - # update metadata, then set the value of the field to an empty string. - # An empty value results in an "rm" (remove) operation for that field - # being sent to N2T. - for k, v in n2t_meta.items(): - if k not in res: - res[k] = [None, v] - else: - res[k][1] = v - return res - - def handle_show(self, *args, **opts): def jsonable_instance(o): if o is None: @@ -250,9 +204,6 @@ def tstamp_to_text(t): # but we want to futz around with the cm section and other fields for each instance. entry = jsonable_instance(identifier) entry["isAgentPid"] = identifier.isAgentPid - if opts["legacy"]: - # Get the "legacy" format, which is used for sending to N2T binder - entry["legacy"] = identifier.toLegacy() if opts["expanded"]: for field_name in expand_fields: entry["fields"][field_name] = jsonable_instance(getattr(identifier, field_name)) @@ -329,14 +280,10 @@ def handle_list(self, *args, **opts): identifier_class = ezidapp.models.identifier.Identifier identifiers = identifier_class.objects.filter(**_filter).order_by("-createTime")[:max_rows] dfields = _fields - if opts.get("compare", False): - dfields.append('n2t') writer = csv.DictWriter(self.stdout, dfields, dialect='excel') writer.writeheader() for identifier in identifiers: row = django.forms.models.model_to_dict(identifier, fields=_fields) - if opts.get('compare', False): - row['n2t'] = self.diff_n2t(identifier) writer.writerow(row) diff --git a/ezidapp/management/commands/proc-binder.py b/ezidapp/management/commands/proc-binder.py deleted file mode 100644 index c835a863d..000000000 --- a/ezidapp/management/commands/proc-binder.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright©2021, Regents of the University of California -# http://creativecommons.org/licenses/BSD - -"""Asynchronous N2T binder processing -""" - -import logging - -import ezidapp.management.commands.proc_base -import ezidapp.models.async_queue -import ezidapp.models.identifier -import impl.log -import impl.noid_egg - -log = logging.getLogger(__name__) - - -class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): - help = __doc__ - name = __name__ - setting = 'DAEMONS_BINDER_ENABLED' - queue = ezidapp.models.async_queue.BinderQueue - - def create(self, task_model: ezidapp.models.async_queue.BinderQueue): - """ - Creates an entry in N2T for a new identifier. - The fields to be set are described in the N2T API documentation: - http://n2t.net/e/n2t_apidoc.html - Minimally, the fields must include: - who - what - when - where - how Where is this value stored in EZID? - _t - """ - id_str = task_model.refIdentifier.identifier - self.log.info("CREATE: %s", id_str) - ##metadata = task_model.refIdentifier.metadata - # add the required target metadata: - ##metadata["_t"] = task_model.refIdentifier.target - metadata = task_model.refIdentifier.toLegacy() - try: - impl.noid_egg.setElements(id_str, metadata) - task_model.status = self.queue.SUCCESS - except AssertionError as e: - task_model.status = self.queue.FAILURE - self.log.error("CREATE: %s", id_str, e) - except Exception as e: - task_model.status = self.queue.FAILURE - self.log.error("CREATE: %s", id_str, e) - task_model.save() - - def update(self, task_model: ezidapp.models.async_queue.BinderQueue): - ''' - task_model: BinderQueue - - Retrieves existing metadata from N2T and sends back updates to any - new fields oor fields that have changed values. - ''' - id_str = task_model.refIdentifier.identifier - ##metadata = task_model.refIdentifier.metadata - ### add the required target metadata: - ##metadata["_t"] = task_model.refIdentifier.target - metadata = task_model.refIdentifier.toLegacy() - self.log.info("UPDATE: %s", id_str) - - # Retrieve the existing metadata from N2T - m = impl.noid_egg.getElements(id_str) - if m is None: - m = {} - # First, update m with provided metadata - for k, v in list(metadata.items()): - # If the provided metadata matches existing, then ignore - if m.get(k) == v: - del m[k] - # Otherwise add property to list for sending back to N2T - else: - m[k] = v - # If properties retrieved from N2T are not present in the supplied - # update metadata, then set the value of the field to an empty string. - # An empty value results in an "rm" (remove) operation for that field - # being sent to N2T. - for k in list(m.keys()): - if k not in metadata: - m[k] = "" - self.log.debug("UPDATE: %s m = %s", id_str, m) - if len(m) > 0: - try: - impl.noid_egg.setElements(id_str, m) - task_model.status = self.queue.SUCCESS - except AssertionError as e: - task_model.status = self.queue.FAILURE - self.log.error("UPDATE: %s", id_str, e) - except Exception as e: - task_model.status = self.queue.FAILURE - self.log.error("UPDATE: %s", id_str, e) - task_model.save() - - def delete(self, task_model: ezidapp.models.async_queue.BinderQueue): - id_str = task_model.refIdentifier.identifier - try: - impl.noid_egg.deleteIdentifier(id_str) - task_model.status = self.queue.SUCCESS - except AssertionError as e: - task_model.status = self.queue.FAILURE - self.log.error("DELETE: %s", id_str, e) - except Exception as e: - task_model.status = self.queue.FAILURE - self.log.error("DELETE: %s", id_str, e) - task_model.save() - - def batchCreate(self, batch): - impl.noid_egg.batchSetElements(batch) - - def batchDelete(self, batch): - impl.noid_egg.batchDeleteIdentifier( - [identifier for identifier, metadata in batch], - ) diff --git a/impl/api.py b/impl/api.py index 444c28a95..6314aa650 100644 --- a/impl/api.py +++ b/impl/api.py @@ -102,7 +102,6 @@ import impl.datacite import impl.download import impl.ezid -import impl.noid_egg import impl.resolver import impl.search_util import impl.statistics @@ -427,9 +426,7 @@ def getStatus(request): if l == "*": l = "binder,datacite,search" for ss in [ss.strip() for ss in l.split(",") if len(ss.strip()) > 0]: - if ss == "binder": - body += f"binder: {impl.noid_egg.ping()}\n" - elif ss == "datacite": + if ss == "datacite": body += f"datacite: {impl.datacite.ping()}\n" elif ss == "search": body += f"search: {impl.search_util.ping()}\n" diff --git a/impl/noid_egg.py b/impl/noid_egg.py deleted file mode 100644 index faec21880..000000000 --- a/impl/noid_egg.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright©2021, Regents of the University of California -# http://creativecommons.org/licenses/BSD - -"""Interface to the "egg" (binder) portion of noid - -A note on encodings. Identifiers and metadata elements (both names and values) are sent -to noid in encoded form; see util.encode{3,4}. Metadata elements received from void are -UTF-8-encoded and utilize percent-encoding. Though this received encoding does not -exactly match the transmitted encoding, the decoding performed by util.decode is -nevertheless compatible and so we use it. (Consider a Python Unicode value -u"Greg%Jan\xe9e". This is sent as "Greg%25Jan%C3%A9e" but received back as -"Greg%25Jan\xc3\xa9e", which, when percent- and UTF-8-decoded, yields the original -value.) - -This module performs whitespace processing. Leading and trailing whitespace is stripped -from both element names and values. Empty names are not allowed. Setting an empty -value causes the element to be deleted; as a consequence, empty values are never -returned. -""" - -import logging -import re -import time -import urllib.error -import urllib.parse -import urllib.request -import urllib.response - -import django.conf - -import impl.log -import impl.util - -log = logging.getLogger(__name__) - -DECODE_RX = re.compile("\^([0-9a-fA-F][0-9a-fA-F])?") - - -@impl.log.stacklog -def _issue(method, operations): - # noinspection PyUnresolvedReferences - r = urllib.request.Request(django.conf.settings.BINDER_URL + "?-") - r.get_method = lambda: method - # noinspection PyTypeChecker - r.add_header( - "Authorization", - impl.util.basic_auth( - django.conf.settings.BINDER_USERNAME, - django.conf.settings.BINDER_PASSWORD, - ), - ) - _timeout = 60 #seconds - try: - _timeout = django.conf.settings.DAEMONS_HTTP_CLIENT_TIMEOUT - except AttributeError: - log.warning("No settings.DAEMONS_HTTP_CLIENT_TIMEOUT. Using default of %s", _timeout) - - r.add_header("Content-Type", "text/plain") - - s = "" - - l = [] - for o in operations: - # o = (identifier, operation [,element [, value]]) - s = f":hx% {impl.util.encode4(o[0])}.{o[1]}" - if len(o) > 2: - s += " " + impl.util.encode4(o[2]) - if len(o) > 3: - s += " " + impl.util.encode3(o[3]) - l.append(s) - r.data = "\n".join(l).encode('utf-8') - - for i in range(django.conf.settings.BINDER_NUM_ATTEMPTS): - c = None - log.debug("noid_egg._issue attempt:%s %s url:%s", i, method, r.full_url) - try: - c = urllib.request.urlopen(r, timeout=_timeout) - s = [line.decode('utf-8', errors='replace') for line in c.readlines()] - except Exception as e: - # noinspection PyTypeChecker - log.warning("noid_egg._issue attempt:%s exception: %s", i, e) - if i == django.conf.settings.BINDER_NUM_ATTEMPTS - 1: - raise - else: - break - finally: - if c: - c.close() - # noinspection PyTypeChecker - # increase reattempt delay as a magnitude of BINDER_NUM_ATTEMPTS - time.sleep(django.conf.settings.BINDER_REATTEMPT_DELAY + (60 * (i + 1))) - - return s - - -def _error(operation, s): - return f'unexpected return from noid egg "{operation}":\n ' f'{"".join(str(x) for x in s)}' - - -def identifierExists(id_str): - """Return true if an identifier (given in normalized, qualified form, - e.g., "doi:10.1234/FOO") exists. - - Raises an exception on error. - """ - # The question of whether an identifier exists or not is surprisingly elusive. Noid will return - # information for any identifier string, so we can't use that as a test. Instead, we test for - # the presence of metadata. EZID populates a newly-created identifier with multiple metadata - # fields. (Noid adds its own internal metadata fields, but only in response to EZID adding - # fields.) - # - # The 'getElements' and 'deleteIdentifier' functions below work to maintain the invariant - # property that either an identifier has EZID metadata (along with noid-internal metadata) or it - # has no metadata at all. - s = _issue("GET", [(id_str, "fetch")]) - assert ( - len(s) >= 4 - and s[0].startswith("# id:") - and s[-3].startswith("# elements bound under") - and s[-2] == "egg-status: 0\n" - ), _error("fetch", s) - m = re.search(": (\\d+)\n$", s[-3]) - assert m, _error("fetch", s) - return m.group(1) != "0" - - -def setElements(id_str, d): - """Bind metadata elements to an id_str (given in normalized, qualified - form, e.g., "doi:10.1234/FOO"). - - The elements should be given in a dictionary that maps names to - values. Raises an exception on error. - - Setable elements are described in the N2T API docs: http://n2t.net/e/n2t_apidoc.html - """ - batchSetElements([(id_str, d)]) - - -def batchSetElements(batch): - """Similar to 'setElements' above, but operates on multiple identifiers in - one request. - - 'batch' should be a list of (identifier, name/value dictionary) - tuples. - """ - bind_list = [] - for identifier, d in batch: - for e, v in list(d.items()): - e = e.strip() - assert len(e) > 0, "empty label" - v = v.strip() - if v == "": - bind_list.append((identifier, "rm", e)) - else: - bind_list.append((identifier, "set", e, v)) - s = _issue("POST", bind_list) - assert len(s) >= 2 and s[-2] == "egg-status: 0\n", _error("set/rm", s) - - -def getElements(identifier): - """Return all metadata elements (in the form of a dictionary) that are - bound to an identifier (given in normalized, qualified form, e.g., - "doi:10.1234/FOO"), or None if the identifier doesn't exist. - - Raises an exception on error. - """ - # See the comment under 'identifierExists' above. - s = _issue("GET", [(identifier, "fetch")]) - assert ( - len(s) >= 4 - and s[0].startswith("# id:") - and s[-3].startswith("# elements bound under") - and s[-2] == "egg-status: 0\n" - ), _error("fetch", s) - m = re.search(": (\\d+)\n$", s[-3]) - assert m, _error("fetch", s) - c = int(m.group(1)) - assert len(s) == c + 4, _error("fetch", s) - if c == 0: - return None - else: - d = {} - for l in s[1 : len(s) - 3]: - assert ":" in l, _error("fetch", s) - if l.startswith("__") or l.startswith("_.e") or l.startswith("_,e"): - continue - e, v = l.split(":", 1) - d[impl.util.decode(e)] = impl.util.decode(v.strip()) - # There had better be at least one non-noid-internal binding. - assert len(d) > 0, _error("fetch", s) - return d - - -def deleteIdentifier(identifier): - """Delete all metadata elements (including noid-internal elements) bound - to an identifier (given in normalized, qualified form, e.g., - "doi:10.1234/FOO"). - - After calling this function, the identifier is deleted in the sense - that identifierExists(identifier) will return False and - getElements(identifier) will return None. As far as noid is - concerned, however, the identifier still exists and metadata - elements can be re-bound to it in the future. Raises an exception - on error. - """ - s = _issue("POST", [(identifier, "purge")]) - assert len(s) >= 2 and s[-2] == "egg-status: 0\n", _error("purge", s) - # See the comment under 'identifierExists' above. - assert not identifierExists( - identifier - ), f"noid egg 'purge' operation on {identifier} left remaining bindings" - - -def batchDeleteIdentifier(batch): - """Similar to 'deleteIdentifier' above, but deletes a list of identifiers - in one request.""" - # The following code does not verify that all bindings have been - # removed as 'deleteIdentifier' does above. But that code is just a - # guard against noid API changes, and having it in one place is - # sufficient. - s = _issue("POST", [(identifier, "purge") for identifier in batch]) - assert len(s) >= 2 and s[-2] == "egg-status: 0\n", _error("purge", s) - - -def ping(): - """Test the server, returning "up" or "down".""" - try: - s = _issue("GET", []) - assert len(s) >= 2 and s[-2] == "egg-status: 0\n" - return "up" - except Exception: - return "down" - - -def _decodeRewriter(m): - assert len(m.group(0)) == 3, "circumflex decode error" - return chr(int(m.group(0)[1:], 16)) - - -def decodeRaw(s): - """Decode an identifier or metadata element name as stored internally in - noid. - - Raises AssertionError and UnicodeDecodeError. - """ - return DECODE_RX.sub(_decodeRewriter, s) diff --git a/impl/noid_nog_standalone.py b/impl/noid_nog_standalone.py deleted file mode 100644 index 00759254a..000000000 --- a/impl/noid_nog_standalone.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright©2021, Regents of the University of California -# http://creativecommons.org/licenses/BSD - -"""Standalone version of noid_nog.py for use by offline tools -""" - -import base64 -import urllib.error -import urllib.parse -import urllib.request -import urllib.response - - -class Minter(object): - """A minter for a specific shoulder.""" - - def __init__(self, url, username, password): - """Create an interface to the noid nog minter at the supplied URL - using the supplied credentials.""" - self.url = url - self.username = username - self.password = password - - def _addAuthorization(self, request): - request.add_header( - "Authorization", - b"Basic " + (base64.b64encode(self.username + ":" + self.password)), - ) - - def mintIdentifier(self): - """Mint and returns a scheme-less ARK identifier, e.g., - "13030/fk35717n0h". - - Raises an exception on error. - """ - r = urllib.request.Request(self.url + "?mint%201") - self._addAuthorization(r) - c = None - try: - c = urllib.request.urlopen(r) - s = c.readlines() - finally: - if c: - c.close() - assert ( - len(s) >= 2 - and (s[0].startswith("id:") or s[0].startswith("s:")) - and s[-2] == "nog-status: 0\n" - ), "unexpected return from minter, output follows\n" + "".join(s) - return s[0].split(":", 1)[1].strip() diff --git a/migrate_db_to_py3/diag-create-fixtures.py b/migrate_db_to_py3/diag-create-fixtures.py index ae4d0c102..deb15e006 100644 --- a/migrate_db_to_py3/diag-create-fixtures.py +++ b/migrate_db_to_py3/diag-create-fixtures.py @@ -25,7 +25,6 @@ import impl.nog_sql.filesystem import impl.nog_sql.shoulder import impl.nog_sql.util -import impl.noid_egg APP_LABEL = 'ezidapp' From 7783f1a1f4d49e540843ed079304a5f4fbab4f6e Mon Sep 17 00:00:00 2001 From: sfisher Date: Thu, 29 Aug 2024 15:52:11 -0700 Subject: [PATCH 005/115] This will go through all the identifiers in opensearch and delete those that don't exist in the database. --- .../management/commands/opensearch-delete.py | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 ezidapp/management/commands/opensearch-delete.py diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py new file mode 100644 index 000000000..b7a18325b --- /dev/null +++ b/ezidapp/management/commands/opensearch-delete.py @@ -0,0 +1,119 @@ +from django.core.management.base import BaseCommand +from django.conf import settings +from ezidapp.models.identifier import SearchIdentifier +from impl.open_search_doc import OpenSearchDoc +import json +from django.db import connection + + + +# this is only for the time being since I'm using a local server without correct SSL/https +import urllib3 +from urllib3.exceptions import InsecureRequestWarning + +# Disable only the InsecureRequestWarning +urllib3.disable_warnings(InsecureRequestWarning) +# end suppression of urllib3 InsecureRequestWarning + +SPLIT_SIZE = 100 + +# run: python manage.py opensearch-update +# optional parameters: --starting_id 1234 --updated_since 2023-10-10T00:00:00Z +# --starting_id is the primary key ID to start populating from (good for resuming after a crash while populating all) +# --updated_since is a date in ISO 8601 format (YYYY-MM-DDTHH:MM:SS) to filter by updated time +# it allows you to only populate items updated after a certain date/time, which should make the population much faster +# because no need to repopulate all items for the entire history. + +# Even if items are already up-to-date, it doesn't hurt to repopulate them since it just updates from the +# copy of record which is the database values. OpenSearach values are derived for search and display purposes. + +# NOTE: This script will need revision if the SearchIdentifier model is ever removed from EZID since it relies on the +# SearchIdentifier update time to determine what to update in OpenSearch. It could be modified to use the +# Identifier update time instead, but that might be a different time that does not take into account the link checker +# which is updates in the SearchIdentifier table and doesn't update the Identifier table. + + +class Command(BaseCommand): + def handle(self, *args, **options): + # iterate through all items in the OpenSearch index and check against the database + # SearchIdentifier table to find removed items and remove them from the index + + # Initialize the OpenSearch client + client = OpenSearchDoc.CLIENT + index_name=settings.OPENSEARCH_INDEX + + # Start the scroll + response = client.search( + index=index_name, + body={ + "query": { + "match_all": {} + } + }, + scroll='2m', # Keep the scroll context alive for 2 minutes + size=100 # Number of results per batch + ) + + # Extract the scroll ID and the initial batch of results + scroll_id = response['_scroll_id'] + hits = response['hits']['hits'] + + checked_count = 100 + + # Continue scrolling until no more results are returned + while len(hits) > 0: + ids = [hit['_id'] for hit in hits] + + # Make a left join query which should be efficient for getting a list of items that are in the index but + # not in the database. MySQL makes it more complicated because it doesn't support FROM VALUES. + + # Convert the list of identifiers to a string format suitable for SQL. This UNION ALL is janky as hell + # but MySQL doesn't support FROM VALUES. The other option was to create a temporary table every time, but + # that seemed like overkill. + ids_union = ' UNION ALL '.join(f"SELECT '{identifier}' AS identifier" for identifier in ids) + + # Raw SQL query to find identifiers in the list that are not in the database + query = f""" + SELECT id_list.identifier + FROM ({ids_union}) AS id_list + LEFT JOIN ezidapp_searchidentifier AS si ON id_list.identifier = si.identifier + WHERE si.identifier IS NULL; + """ + + # Execute the query + with connection.cursor() as cursor: + cursor.execute(query) + missing_identifiers = [row[0] for row in cursor.fetchall()] + + missing_identifiers_list = list(missing_identifiers) + + if len(missing_identifiers_list) > 0: + # Create the bulk delete request payload + bulk_delete_payload = "" + for identifier in missing_identifiers_list: + bulk_delete_payload += json.dumps( + {"delete": {"_index": index_name, "_id": identifier}}) + "\n" + + # Send the bulk delete request to OpenSearch + response = client.bulk(body=bulk_delete_payload) + + # Check the response + if response['errors']: + print(f" Errors occurred during bulk delete of {missing_identifiers_list.join(', ')}") + else: + print(f" Bulk delete successful deleting {missing_identifiers_list.join(', ')}") + + print("checked:", checked_count) + + response = client.scroll( + scroll_id=scroll_id, + scroll='2m' + ) + + scroll_id = response['_scroll_id'] + hits = response['hits']['hits'] + checked_count += len(hits) + + # Clear the scroll context + client.clear_scroll(scroll_id=scroll_id) + print("Done removing deleted IDs") From ede31f5b128963bb7155074f0fe7545045f0cf94 Mon Sep 17 00:00:00 2001 From: sfisher Date: Thu, 29 Aug 2024 15:54:52 -0700 Subject: [PATCH 006/115] Fixing some leftover code and comments that don't appply anymore. --- .../management/commands/opensearch-delete.py | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py index b7a18325b..68ec9cb9c 100644 --- a/ezidapp/management/commands/opensearch-delete.py +++ b/ezidapp/management/commands/opensearch-delete.py @@ -5,33 +5,9 @@ import json from django.db import connection - - -# this is only for the time being since I'm using a local server without correct SSL/https -import urllib3 -from urllib3.exceptions import InsecureRequestWarning - -# Disable only the InsecureRequestWarning -urllib3.disable_warnings(InsecureRequestWarning) -# end suppression of urllib3 InsecureRequestWarning - SPLIT_SIZE = 100 -# run: python manage.py opensearch-update -# optional parameters: --starting_id 1234 --updated_since 2023-10-10T00:00:00Z -# --starting_id is the primary key ID to start populating from (good for resuming after a crash while populating all) -# --updated_since is a date in ISO 8601 format (YYYY-MM-DDTHH:MM:SS) to filter by updated time -# it allows you to only populate items updated after a certain date/time, which should make the population much faster -# because no need to repopulate all items for the entire history. - -# Even if items are already up-to-date, it doesn't hurt to repopulate them since it just updates from the -# copy of record which is the database values. OpenSearach values are derived for search and display purposes. - -# NOTE: This script will need revision if the SearchIdentifier model is ever removed from EZID since it relies on the -# SearchIdentifier update time to determine what to update in OpenSearch. It could be modified to use the -# Identifier update time instead, but that might be a different time that does not take into account the link checker -# which is updates in the SearchIdentifier table and doesn't update the Identifier table. - +# run: python manage.py opensearch-delete class Command(BaseCommand): def handle(self, *args, **options): From 07c5c092afc2c7d9fbcd59f0856804500b10c927 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Thu, 29 Aug 2024 17:15:36 -0700 Subject: [PATCH 007/115] remove binder queue related code --- ansible/test_vars.yaml | 2 -- ezidapp/management/commands/check-ezid.py | 1 - impl/api.py | 3 +-- impl/statistics.py | 5 ----- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/ansible/test_vars.yaml b/ansible/test_vars.yaml index 88414f113..5ec6ef080 100644 --- a/ansible/test_vars.yaml +++ b/ansible/test_vars.yaml @@ -37,8 +37,6 @@ - name: return a single param based on ssm_root_path as variable ##debug: msg="{{ ssm_params['database_host'] }}" debug: msg="database_host {{ database_host }}" - - name: return binder_url - debug: msg="{{ binder_url }}" - name: return resolver_ark debug: msg="{{ resolver_ark }}" diff --git a/ezidapp/management/commands/check-ezid.py b/ezidapp/management/commands/check-ezid.py index 6c69996f5..e37ac06e8 100644 --- a/ezidapp/management/commands/check-ezid.py +++ b/ezidapp/management/commands/check-ezid.py @@ -69,7 +69,6 @@ } queueType = { - 'binder': ezidapp.models.async_queue.BinderQueue, 'crossref': ezidapp.models.async_queue.CrossrefQueue, 'datacite': ezidapp.models.async_queue.DataciteQueue, 'search': ezidapp.models.async_queue.SearchIndexerQueue diff --git a/impl/api.py b/impl/api.py index 6314aa650..086180ea3 100644 --- a/impl/api.py +++ b/impl/api.py @@ -424,7 +424,7 @@ def getStatus(request): if "subsystems" in options: l = options["subsystems"] if l == "*": - l = "binder,datacite,search" + l = "datacite,search" for ss in [ss.strip() for ss in l.split(",") if len(ss.strip()) > 0]: if ss == "datacite": body += f"datacite: {impl.datacite.ping()}\n" @@ -478,7 +478,6 @@ def _statusLineGenerator(includeSuccessLine): f"STATUS {'paused' if isPaused else 'running'} " f"activeOperations={sum(activeUsers.values())} " f"waitingRequests={sum(waitingUsers.values())} " - f"binderQueueLength={impl.statistics.getBinderQueueLength()} " f"dataciteQueueLength={impl.statistics.getDataCiteQueueLength()} " "\n" ) diff --git a/impl/statistics.py b/impl/statistics.py index 44eefb033..b0fd1e1c7 100644 --- a/impl/statistics.py +++ b/impl/statistics.py @@ -7,11 +7,6 @@ import ezidapp.models.async_queue -def getBinderQueueLength(): - """Return the length of the binder queue.""" - return ezidapp.models.async_queue.BinderQueue.objects.count() - - def getDataCiteQueueLength(): """Return the length of the DataCite queue.""" return ezidapp.models.async_queue.DataciteQueue.objects.count() From 483f1bc6ee2c83782289ed33aab7185739ff5427 Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 30 Aug 2024 10:53:28 -0700 Subject: [PATCH 008/115] Adding retry to the scroll since I got an error one time it was used --- .../management/commands/opensearch-delete.py | 33 ++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py index 68ec9cb9c..acc103146 100644 --- a/ezidapp/management/commands/opensearch-delete.py +++ b/ezidapp/management/commands/opensearch-delete.py @@ -4,6 +4,7 @@ from impl.open_search_doc import OpenSearchDoc import json from django.db import connection +import time SPLIT_SIZE = 100 @@ -40,10 +41,7 @@ def handle(self, *args, **options): while len(hits) > 0: ids = [hit['_id'] for hit in hits] - # Make a left join query which should be efficient for getting a list of items that are in the index but - # not in the database. MySQL makes it more complicated because it doesn't support FROM VALUES. - - # Convert the list of identifiers to a string format suitable for SQL. This UNION ALL is janky as hell + # Convert the list of identifiers to a string format suitable for SQL. This UNION ALL is janky # but MySQL doesn't support FROM VALUES. The other option was to create a temporary table every time, but # that seemed like overkill. ids_union = ' UNION ALL '.join(f"SELECT '{identifier}' AS identifier" for identifier in ids) @@ -81,10 +79,12 @@ def handle(self, *args, **options): print("checked:", checked_count) - response = client.scroll( - scroll_id=scroll_id, - scroll='2m' - ) + try: + response = self.scroll_with_retry(client, scroll_id) + except Exception as e: + print(e) + break + scroll_id = response['_scroll_id'] hits = response['hits']['hits'] @@ -93,3 +93,20 @@ def handle(self, *args, **options): # Clear the scroll context client.clear_scroll(scroll_id=scroll_id) print("Done removing deleted IDs") + + @staticmethod + def scroll_with_retry(client, scroll_id, max_retries=5, sleep_time=5): + for attempt in range(max_retries): + try: + response = client.scroll( + scroll_id=scroll_id, + scroll='2m' + ) + return response + except Exception as e: + if attempt < max_retries - 1: + print(f"Scroll attempt {attempt + 1} failed, retrying in {sleep_time} seconds...") + time.sleep(sleep_time) + else: + print(f"Scroll attempt {attempt + 1} failed, no more retries.") + raise e \ No newline at end of file From e4b6dc2be1d741b2596c2ae49479496728806714 Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 30 Aug 2024 10:55:07 -0700 Subject: [PATCH 009/115] indenting message slightly. --- ezidapp/management/commands/opensearch-delete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py index acc103146..1d05c758e 100644 --- a/ezidapp/management/commands/opensearch-delete.py +++ b/ezidapp/management/commands/opensearch-delete.py @@ -105,8 +105,8 @@ def scroll_with_retry(client, scroll_id, max_retries=5, sleep_time=5): return response except Exception as e: if attempt < max_retries - 1: - print(f"Scroll attempt {attempt + 1} failed, retrying in {sleep_time} seconds...") + print(f" Scroll attempt {attempt + 1} failed, retrying in {sleep_time} seconds...") time.sleep(sleep_time) else: - print(f"Scroll attempt {attempt + 1} failed, no more retries.") + print(f" Scroll attempt {attempt + 1} failed, no more retries.") raise e \ No newline at end of file From 5cbe4ef26569dc5e02bd9bd0cf2ef7a820691b39 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Tue, 3 Sep 2024 13:12:04 -0700 Subject: [PATCH 010/115] Refactor EZID client library Remove duplicated ezid client util scripts Rename misleading variables Add script to test ezid client --- tests/apicli_py2.py | 216 --------------------------------- tests/apicli_py3.py | 216 --------------------------------- tests/util/ezid_client.py | 6 +- tests/util/ezid_client_test.py | 42 +++++++ 4 files changed, 45 insertions(+), 435 deletions(-) delete mode 100644 tests/apicli_py2.py delete mode 100644 tests/apicli_py3.py create mode 100644 tests/util/ezid_client_test.py diff --git a/tests/apicli_py2.py b/tests/apicli_py2.py deleted file mode 100644 index 1f93200a0..000000000 --- a/tests/apicli_py2.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright©2021, Regents of the University of California -# http://creativecommons.org/licenses/BSD - -"""Minimal EZID API client lib to support testing - -This minimal client lib for the EZID API is intended for supporting integrations tests and hence is -developed using the same version of python utilized by the EZID application. - -Based on https://github.com/CDLUC3/ezid-client-tools -""" - -import codecs -import logging -import re -import time -import urllib.error -import urllib.parse -import urllib.request -import urllib.response - - -class EZIDHTTPErrorProcessor(urllib.request.HTTPErrorProcessor): - def http_response(self, request, response): - # Bizarre that Python leaves this out. - if response.status == 201: - return response - else: - return urllib.request.HTTPErrorProcessor.http_response( - self, request, response - ) - - https_response = http_response - - -class EZIDClient(object): - def __init__( - self, - server_url, - session_id=None, - username=None, - password=None, - encoding="utf-8", - ): - self._L = logging.getLogger(self.__class__.__name__) - self._settings.BINDER_URL = server_url.strip("/") - self._cookie = session_id - self._encoding = encoding - self._opener = urllib.request.build_opener(EZIDHTTPErrorProcessor()) - self._username = username # preserve for test validation - if self._cookie is None: - self._setAuthHandler(username, password) - - def _encode(self, id_str): - return urllib.parse.quote(id_str, ":/") - - def _setAuthHandler(self, username, password): - h = urllib.request.HTTPBasicAuthHandler() - # noinspection PyUnresolvedReferences - h.add_password("EZID", self._settings.BINDER_URL, username, password) - self._opener.add_handler(h) - - def formatAnvlRequest(self, args): - request = [] - for i in range(0, len(args), 2): - k = args[i].decode(self._encoding) - if k == "@": - f = codecs.open(args[i + 1], encoding=self._encoding) - request += [l.strip("\r\n") for l in f.readlines()] - f.close() - else: - if k == "@@": - k = "@" - else: - k = re.sub("[%:\r\n]", lambda c: f"%{ord(c.group(0)):02X}", k) - v = args[i + 1].decode(self._encoding) - if v.startswith("@@"): - v = v[1:] - elif v.startswith("@") and len(v) > 1: - f = codecs.open(v[1:], encoding=self._encoding) - v = f.read() - f.close() - v = re.sub("[%\r\n]", lambda c: f"%{ord(c.group(0)):02X}", v) - request.append(f"{k}: {v}") - return "\n".join(request) - - def anvlresponseToDict( - self, response, format_timestamps=True, decode=False, _encoding="utf-8" - ): - res = {"status": "unknown", "status_message": "no content", "body": ""} - if response is None: - return res - response = response.splitlines() - # Treat the first response line as the status - K, V = response[0].split(":", 1) - res["status"] = K - res["status_message"] = V.strip(" ") - for line in response[1:]: - try: - K, V = line.split(":", 1) - V = V.strip() - if format_timestamps and (K == "_created:" or K == "_updated:"): - ls = line.split(":") - V = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(int(ls[1]))) - if decode: - V = re.sub( - "%([0-9a-fA-F][0-9a-fA-F])", - lambda m: chr(int(m.group(1), 16)), - V, - ) - self._L.debug("K : V = %s : %s", K, V) - res[K] = V - except ValueError: - res["body"] += line - return res - - def anvlResponseToText( - self, - response, - sort_lines=False, - format_timestamps=True, - decode=False, - one_line=False, - encoding="utf-8", - ): - lines = [] - if response is None: - return None - response = response.splitlines() - if sort_lines and len(response) >= 1: - statusLine = response[0] - response = response[1:] - response.sort() - response.insert(0, statusLine) - for line in response: - if format_timestamps and ( - line.startswith("_created:") or line.startswith("_updated:") - ): - ls = line.split(":") - line = ( - ls[0] - + ": " - + time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(int(ls[1]))) - ) - if decode: - line = re.sub( - "%([0-9a-fA-F][0-9a-fA-F])", - lambda m: chr(int(m.group(1), 16)), - line, - ) - if one_line: - line = line.replace("\n", " ").replace("\r", " ") - lines.append(line.encode(encoding)) - return b"\n".join(lines) - - def issueRequest(self, path, method, data=None, dest_f=None): - url = f"{self._settings.BINDER_URL}/{path}" - self._L.info("sending request: %s", url) - request = urllib.request.Request(url) - request.get_method = lambda: method - response = None - if data is not None: - request.add_header("Content-Type", "text/plain; charset=utf-8") - # noinspection PyUnresolvedReferences - request.data = data.encode("utf-8") - if self._cookie is not None: - request.add_header("Cookie", self._cookie) - try: - connection = self._opener.open(request) - if not dest_f is None: - while True: - dest_f.write(connection.read(1)) - dest_f.flush() - else: - response = connection.read() - return response.decode("utf-8"), connection.info() - except urllib.error.HTTPError as e: - self._L.error(f"{e.code:d} {str(e)}") - if e.fp is not None: - response = e.fp.read() - self._L.error(response) - return response, {} - - def login(self, username=None, password=None): - if not username is None: - self._setAuthHandler(username, password) - self._cookie = None - response, headers = self.issueRequest("login", "GET") - try: - self._cookie = headers.get("set-cookie", "").split(";")[0].split("=")[1] - response += f"\nsessionid={self._cookie}\n" - except IndexError: - self._L.warning("No sessionid cookie in response.") - return self.anvlresponseToDict(response) - - def logout(self): - response, headers = self.issueRequest("logout", "GET") - return self.anvlresponseToDict(response) - - def status(self): - response, headers = self.issueRequest("status", "GET") - return self.anvlresponseToDict(response) - - def mint(self, shoulder, params=None): - if params is None: - params = [] - data = self.formatAnvlRequest(params) - url = "shoulder/" + self._encode(shoulder) - response, headers = self.issueRequest(url, "POST", data=data) - return self.anvlresponseToDict(response) - - def view(self, pid, bang=False): - path = "id/" + self._encode(pid) - if bang: - path += "?prefix_match=yes" - response, headers = self.issueRequest(path, "GET") - return self.anvlresponseToDict(response) diff --git a/tests/apicli_py3.py b/tests/apicli_py3.py deleted file mode 100644 index 1f93200a0..000000000 --- a/tests/apicli_py3.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright©2021, Regents of the University of California -# http://creativecommons.org/licenses/BSD - -"""Minimal EZID API client lib to support testing - -This minimal client lib for the EZID API is intended for supporting integrations tests and hence is -developed using the same version of python utilized by the EZID application. - -Based on https://github.com/CDLUC3/ezid-client-tools -""" - -import codecs -import logging -import re -import time -import urllib.error -import urllib.parse -import urllib.request -import urllib.response - - -class EZIDHTTPErrorProcessor(urllib.request.HTTPErrorProcessor): - def http_response(self, request, response): - # Bizarre that Python leaves this out. - if response.status == 201: - return response - else: - return urllib.request.HTTPErrorProcessor.http_response( - self, request, response - ) - - https_response = http_response - - -class EZIDClient(object): - def __init__( - self, - server_url, - session_id=None, - username=None, - password=None, - encoding="utf-8", - ): - self._L = logging.getLogger(self.__class__.__name__) - self._settings.BINDER_URL = server_url.strip("/") - self._cookie = session_id - self._encoding = encoding - self._opener = urllib.request.build_opener(EZIDHTTPErrorProcessor()) - self._username = username # preserve for test validation - if self._cookie is None: - self._setAuthHandler(username, password) - - def _encode(self, id_str): - return urllib.parse.quote(id_str, ":/") - - def _setAuthHandler(self, username, password): - h = urllib.request.HTTPBasicAuthHandler() - # noinspection PyUnresolvedReferences - h.add_password("EZID", self._settings.BINDER_URL, username, password) - self._opener.add_handler(h) - - def formatAnvlRequest(self, args): - request = [] - for i in range(0, len(args), 2): - k = args[i].decode(self._encoding) - if k == "@": - f = codecs.open(args[i + 1], encoding=self._encoding) - request += [l.strip("\r\n") for l in f.readlines()] - f.close() - else: - if k == "@@": - k = "@" - else: - k = re.sub("[%:\r\n]", lambda c: f"%{ord(c.group(0)):02X}", k) - v = args[i + 1].decode(self._encoding) - if v.startswith("@@"): - v = v[1:] - elif v.startswith("@") and len(v) > 1: - f = codecs.open(v[1:], encoding=self._encoding) - v = f.read() - f.close() - v = re.sub("[%\r\n]", lambda c: f"%{ord(c.group(0)):02X}", v) - request.append(f"{k}: {v}") - return "\n".join(request) - - def anvlresponseToDict( - self, response, format_timestamps=True, decode=False, _encoding="utf-8" - ): - res = {"status": "unknown", "status_message": "no content", "body": ""} - if response is None: - return res - response = response.splitlines() - # Treat the first response line as the status - K, V = response[0].split(":", 1) - res["status"] = K - res["status_message"] = V.strip(" ") - for line in response[1:]: - try: - K, V = line.split(":", 1) - V = V.strip() - if format_timestamps and (K == "_created:" or K == "_updated:"): - ls = line.split(":") - V = time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(int(ls[1]))) - if decode: - V = re.sub( - "%([0-9a-fA-F][0-9a-fA-F])", - lambda m: chr(int(m.group(1), 16)), - V, - ) - self._L.debug("K : V = %s : %s", K, V) - res[K] = V - except ValueError: - res["body"] += line - return res - - def anvlResponseToText( - self, - response, - sort_lines=False, - format_timestamps=True, - decode=False, - one_line=False, - encoding="utf-8", - ): - lines = [] - if response is None: - return None - response = response.splitlines() - if sort_lines and len(response) >= 1: - statusLine = response[0] - response = response[1:] - response.sort() - response.insert(0, statusLine) - for line in response: - if format_timestamps and ( - line.startswith("_created:") or line.startswith("_updated:") - ): - ls = line.split(":") - line = ( - ls[0] - + ": " - + time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(int(ls[1]))) - ) - if decode: - line = re.sub( - "%([0-9a-fA-F][0-9a-fA-F])", - lambda m: chr(int(m.group(1), 16)), - line, - ) - if one_line: - line = line.replace("\n", " ").replace("\r", " ") - lines.append(line.encode(encoding)) - return b"\n".join(lines) - - def issueRequest(self, path, method, data=None, dest_f=None): - url = f"{self._settings.BINDER_URL}/{path}" - self._L.info("sending request: %s", url) - request = urllib.request.Request(url) - request.get_method = lambda: method - response = None - if data is not None: - request.add_header("Content-Type", "text/plain; charset=utf-8") - # noinspection PyUnresolvedReferences - request.data = data.encode("utf-8") - if self._cookie is not None: - request.add_header("Cookie", self._cookie) - try: - connection = self._opener.open(request) - if not dest_f is None: - while True: - dest_f.write(connection.read(1)) - dest_f.flush() - else: - response = connection.read() - return response.decode("utf-8"), connection.info() - except urllib.error.HTTPError as e: - self._L.error(f"{e.code:d} {str(e)}") - if e.fp is not None: - response = e.fp.read() - self._L.error(response) - return response, {} - - def login(self, username=None, password=None): - if not username is None: - self._setAuthHandler(username, password) - self._cookie = None - response, headers = self.issueRequest("login", "GET") - try: - self._cookie = headers.get("set-cookie", "").split(";")[0].split("=")[1] - response += f"\nsessionid={self._cookie}\n" - except IndexError: - self._L.warning("No sessionid cookie in response.") - return self.anvlresponseToDict(response) - - def logout(self): - response, headers = self.issueRequest("logout", "GET") - return self.anvlresponseToDict(response) - - def status(self): - response, headers = self.issueRequest("status", "GET") - return self.anvlresponseToDict(response) - - def mint(self, shoulder, params=None): - if params is None: - params = [] - data = self.formatAnvlRequest(params) - url = "shoulder/" + self._encode(shoulder) - response, headers = self.issueRequest(url, "POST", data=data) - return self.anvlresponseToDict(response) - - def view(self, pid, bang=False): - path = "id/" + self._encode(pid) - if bang: - path += "?prefix_match=yes" - response, headers = self.issueRequest(path, "GET") - return self.anvlresponseToDict(response) diff --git a/tests/util/ezid_client.py b/tests/util/ezid_client.py index 76c9c0377..aba08c8a1 100644 --- a/tests/util/ezid_client.py +++ b/tests/util/ezid_client.py @@ -43,7 +43,7 @@ def __init__( encoding="utf-8", ): self._L = logging.getLogger(self.__class__.__name__) - self._settings = django.conf.settings.BINDER_URL = server_url.strip("/") + self._SERVER_URL = server_url.strip("/") self._cookie = session_id self._encoding = encoding self._opener = urllib.request.build_opener(EZIDHTTPErrorProcessor()) @@ -56,7 +56,7 @@ def _encode(self, id_str): def _setAuthHandler(self, username, password): h = urllib.request.HTTPBasicAuthHandler() - h.add_password("EZID", self._settings.BINDER_URL, username, password) + h.add_password("EZID", self._SERVER_URL, username, password) self._opener.add_handler(h) def formatAnvlRequest(self, args): @@ -153,7 +153,7 @@ def anvlResponseToText( return b"\n".join(lines) def issueRequest(self, path, method, data=None, dest_f=None): - url = f"{self._settings.BINDER_URL}/{path}" + url = f"{self._SERVER_URL}/{path}" self._L.info("sending request: %s", url) request = urllib.request.Request(url) request.get_method = lambda: method diff --git a/tests/util/ezid_client_test.py b/tests/util/ezid_client_test.py new file mode 100644 index 000000000..0ef5a3f30 --- /dev/null +++ b/tests/util/ezid_client_test.py @@ -0,0 +1,42 @@ +from ezid_client import EZIDClient as ezid_client + +import argparse + +def main(): + # Create the parser + parser = argparse.ArgumentParser(description="Use the EZID API client library ezid_client.py to test EZID.") + + parser.add_argument('-e', '--env', type=str, required=True, choices=['test', 'dev', 'stg', 'prd'], help='Environment') + parser.add_argument('-u', '--username', type=str, required=False, help='user name') + parser.add_argument('-p', '--password', type=str, required=False, help='password') + + args = parser.parse_args() + env = args.env + username = args.username + password = args.password + + server_url = { + 'test': 'http://127.0.0.1:8000/', + 'dev': 'https://ezid-dev.cdlib.org/', + 'stg': 'https://ezid-stg.cdlib.org/', + 'prd': 'https://ezid.cdlib.org/' + } + + client = ezid_client(server_url.get(env), username=username, password=password) + + print("view") + ret = client.view("ark:/13030/m5z94194") + print(ret) + + print("mint") + ret = client.mint("ark:/99999/fk4") + print(ret) + + print("status") + ret = client.status() + print(ret) + +if __name__ == "__main__": + main() + + From 8f3cbcbc969ffcc8b9d9e36c70e134c132109ccc Mon Sep 17 00:00:00 2001 From: jsjiang Date: Tue, 3 Sep 2024 14:54:29 -0700 Subject: [PATCH 011/115] remove obsolete function call --- ezidapp/management/commands/diag-db-stats.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ezidapp/management/commands/diag-db-stats.py b/ezidapp/management/commands/diag-db-stats.py index 559c8df73..0e20adcd8 100644 --- a/ezidapp/management/commands/diag-db-stats.py +++ b/ezidapp/management/commands/diag-db-stats.py @@ -72,14 +72,6 @@ def print_identifier(self, identifier): # print(id_model) # pprint.pp(id_model.cm) - print('-' * 100) - - impl.enqueue.enqueueBinderIdentifier( - identifier=id_model.identifier, - operation='update', - blob={'x': 'y'}, - ) - # impl.nog.util.print_table(row_list, log.info) def print_all(self): From 77e2f603ffac7f32d0dfcc667956360f3dd424a3 Mon Sep 17 00:00:00 2001 From: sfisher Date: Thu, 5 Sep 2024 14:18:42 -0700 Subject: [PATCH 012/115] Correcting error where I had the join backwards since the OO in Python is weird --- ezidapp/management/commands/opensearch-delete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py index 1d05c758e..67511b4bc 100644 --- a/ezidapp/management/commands/opensearch-delete.py +++ b/ezidapp/management/commands/opensearch-delete.py @@ -73,9 +73,9 @@ def handle(self, *args, **options): # Check the response if response['errors']: - print(f" Errors occurred during bulk delete of {missing_identifiers_list.join(', ')}") + print(f" Errors occurred during bulk delete of {', '.join(missing_identifiers_list)}") else: - print(f" Bulk delete successful deleting {missing_identifiers_list.join(', ')}") + print(f" Bulk delete successful deleting {', '.join(missing_identifiers_list)}") print("checked:", checked_count) From e224051e09eb4f9d85dd563f984c1c1da5fa6566 Mon Sep 17 00:00:00 2001 From: sfisher Date: Mon, 9 Sep 2024 10:50:19 -0700 Subject: [PATCH 013/115] Make identifier value parameterized --- ezidapp/management/commands/opensearch-delete.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ezidapp/management/commands/opensearch-delete.py b/ezidapp/management/commands/opensearch-delete.py index 67511b4bc..c11a34a4a 100644 --- a/ezidapp/management/commands/opensearch-delete.py +++ b/ezidapp/management/commands/opensearch-delete.py @@ -44,7 +44,7 @@ def handle(self, *args, **options): # Convert the list of identifiers to a string format suitable for SQL. This UNION ALL is janky # but MySQL doesn't support FROM VALUES. The other option was to create a temporary table every time, but # that seemed like overkill. - ids_union = ' UNION ALL '.join(f"SELECT '{identifier}' AS identifier" for identifier in ids) + ids_union = ' UNION ALL '.join(f"SELECT %s AS identifier" for _ in ids) # Raw SQL query to find identifiers in the list that are not in the database query = f""" @@ -56,7 +56,7 @@ def handle(self, *args, **options): # Execute the query with connection.cursor() as cursor: - cursor.execute(query) + cursor.execute(query, ids) missing_identifiers = [row[0] for row in cursor.fetchall()] missing_identifiers_list = list(missing_identifiers) From 7acf3923098d2c438abb2e5a4f7792e2b398b825 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Wed, 11 Sep 2024 13:19:57 -0700 Subject: [PATCH 014/115] change link checker update job to run 15 hours after midnight --- settings/settings.py.j2 | 2 +- settings/tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/settings/settings.py.j2 b/settings/settings.py.j2 index 5ef8d2ecf..295fec72c 100644 --- a/settings/settings.py.j2 +++ b/settings/settings.py.j2 @@ -542,7 +542,7 @@ CLOUDWATCH_INSTANCE_NAME = '{{ cloudwatch_instance_name }}' LINKCHECKER_TABLE_UPDATE_CYCLE = 604800 # The converse, how often link checker results are incorporated back into the main EZID # tables. -LINKCHECKER_RESULTS_UPLOAD_CYCLE = 3600 +LINKCHECKER_RESULTS_UPLOAD_CYCLE = 54000 # If 'RESULTS_UPLOAD_SAME_TIME_OF_DAY' is True, then link checker results are # incorporated back once a day, and 'RESULTS_UPLOAD_CYCLE' is interpreted as an offset # from midnight. 'GOOD_RECHECK_MIN_INTERVAL' is the minimum elapsed time between diff --git a/settings/tests.py b/settings/tests.py index 2bde023f5..2c19000f3 100644 --- a/settings/tests.py +++ b/settings/tests.py @@ -501,7 +501,7 @@ LINKCHECKER_TABLE_UPDATE_CYCLE = 604800 # The converse, how often link checker results are incorporated back into the main EZID # tables. -LINKCHECKER_RESULTS_UPLOAD_CYCLE = 3600 +LINKCHECKER_RESULTS_UPLOAD_CYCLE = 54000 # If 'RESULTS_UPLOAD_SAME_TIME_OF_DAY' is True, then link checker results are # incorporated back once a day, and 'RESULTS_UPLOAD_CYCLE' is interpreted as an offset # from midnight. 'GOOD_RECHECK_MIN_INTERVAL' is the minimum elapsed time between From 8e51e4a791ae63a00b3f792b4983d6cd7971b977 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Thu, 12 Sep 2024 14:53:23 -0700 Subject: [PATCH 015/115] change link-checker-update to run 18 hours after midnight --- settings/settings.py.j2 | 2 +- settings/tests.py | 2 +- tests/util/{ezid_client_test.py => ezid_client_testscript.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename tests/util/{ezid_client_test.py => ezid_client_testscript.py} (100%) diff --git a/settings/settings.py.j2 b/settings/settings.py.j2 index 295fec72c..defb03604 100644 --- a/settings/settings.py.j2 +++ b/settings/settings.py.j2 @@ -542,7 +542,7 @@ CLOUDWATCH_INSTANCE_NAME = '{{ cloudwatch_instance_name }}' LINKCHECKER_TABLE_UPDATE_CYCLE = 604800 # The converse, how often link checker results are incorporated back into the main EZID # tables. -LINKCHECKER_RESULTS_UPLOAD_CYCLE = 54000 +LINKCHECKER_RESULTS_UPLOAD_CYCLE = 64800 # If 'RESULTS_UPLOAD_SAME_TIME_OF_DAY' is True, then link checker results are # incorporated back once a day, and 'RESULTS_UPLOAD_CYCLE' is interpreted as an offset # from midnight. 'GOOD_RECHECK_MIN_INTERVAL' is the minimum elapsed time between diff --git a/settings/tests.py b/settings/tests.py index 2c19000f3..75a9d0ccf 100644 --- a/settings/tests.py +++ b/settings/tests.py @@ -501,7 +501,7 @@ LINKCHECKER_TABLE_UPDATE_CYCLE = 604800 # The converse, how often link checker results are incorporated back into the main EZID # tables. -LINKCHECKER_RESULTS_UPLOAD_CYCLE = 54000 +LINKCHECKER_RESULTS_UPLOAD_CYCLE = 64800 # If 'RESULTS_UPLOAD_SAME_TIME_OF_DAY' is True, then link checker results are # incorporated back once a day, and 'RESULTS_UPLOAD_CYCLE' is interpreted as an offset # from midnight. 'GOOD_RECHECK_MIN_INTERVAL' is the minimum elapsed time between diff --git a/tests/util/ezid_client_test.py b/tests/util/ezid_client_testscript.py similarity index 100% rename from tests/util/ezid_client_test.py rename to tests/util/ezid_client_testscript.py From 9d5b665d2a6cfa0531eb30a4bf993fa99921a482 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Fri, 13 Sep 2024 12:35:58 -0700 Subject: [PATCH 016/115] unit test deprecated cgi function --- tests/test_api_util.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 tests/test_api_util.py diff --git a/tests/test_api_util.py b/tests/test_api_util.py new file mode 100644 index 000000000..956e0434f --- /dev/null +++ b/tests/test_api_util.py @@ -0,0 +1,27 @@ +# Copyright©2021, Regents of the University of California +# http://creativecommons.org/licenses/BSD + +import pytest + +from django.test import RequestFactory +import impl.api as api + +@pytest.fixture +def factory(): + return RequestFactory() + +@pytest.mark.parametrize("val,expected",[ + ('text/plain', True), + ('text/plain; charset=utf-8', True), + ('text/plain; charset=US-ASCII', False), + ('text/html', False), + ('text/xml; charset=utf-8', False), + ('application/json', False), + ('application/javascript', False), + ('application/x-www-form-urlencoded', False), +]) +def test_content_type_1(factory, val, expected): + request = factory.post('/shoulder/ark:/99999/fk4', content_type=val) + ret = api.is_text_plain_utf8(request) + assert ret == expected + \ No newline at end of file From 43bc319e678c1da31c98c96f2006260e99c4c772 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Fri, 13 Sep 2024 12:52:57 -0700 Subject: [PATCH 017/115] add test cases to cover mimetype='' --- tests/test_api_util.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test_api_util.py b/tests/test_api_util.py index 956e0434f..8a520788c 100644 --- a/tests/test_api_util.py +++ b/tests/test_api_util.py @@ -13,7 +13,13 @@ def factory(): @pytest.mark.parametrize("val,expected",[ ('text/plain', True), ('text/plain; charset=utf-8', True), + ('text/plain; charset=UTF-8', True), + ('TEXT/PLAIN; charset=utf-8', False), ('text/plain; charset=US-ASCII', False), + ('; charset=utf-8', True), # mimetype='' + ('; charset=US-ASCII', False), # mimetype='' + ('charset=utf-8', False), + ('charset=US-ASCII', False), ('text/html', False), ('text/xml; charset=utf-8', False), ('application/json', False), From 73462eaba5d6f8f5865f8bbc409082c2d4f04980 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Mon, 16 Sep 2024 08:30:09 -0700 Subject: [PATCH 018/115] upgrade github action python version to 3.11 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5dd2ebd75..81fa96308 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.11' architecture: 'x64' - name: Start MySQL service From 0dcb2f7e906b178549d19ff450d545bc9f2961ca Mon Sep 17 00:00:00 2001 From: jsjiang Date: Mon, 16 Sep 2024 12:00:24 -0700 Subject: [PATCH 019/115] test regex in form_objects --- tests/test_form_objects.py | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/test_form_objects.py diff --git a/tests/test_form_objects.py b/tests/test_form_objects.py new file mode 100644 index 000000000..d4b88a112 --- /dev/null +++ b/tests/test_form_objects.py @@ -0,0 +1,55 @@ +import pytest +import re + +import impl.form_objects as form_objects + + +@pytest.mark.parametrize("string,expected",[ + ('2000', '2000'), + ('200', None), + ('2', None), + ('x', None), + ('yyyy', None), + ('', None), + (':unac', ':unac'), + (':unal', ':unal'), + (':unap', ':unap'), + (':unas', ':unas'), + (':unav', ':unav'), + (':unkn', ':unkn'), + (':none', ':none'), + (':tba', ':tba'), + (':tba', ':tba'), + (':tba', ':tba'), +]) +def test_regex_year(string, expected): + pattern = form_objects.REGEX_4DIGITYEAR + match = re.search(pattern, string) + if match: + assert match.group() == expected + +# REGEX_GEOPOINT = '-?(\d+(\.\d*)?|\.\d+)$' +@pytest.mark.parametrize("string,expected",[ + ('-123.456', '-123.456'), + ('123.456', '123.456'), + ('1.', '1.'), + ('-1.', '-1.'), + ('12.', '12.'), + ('.1', '.1'), + ('-.1', '-.1'), + ('.456', '.456'), + ('-.456', '-.456'), + ('.', None), + ('x', None), + ('yyyy', None), + ('', None), +]) +def test_regex_geopoint(string, expected): + pattern = form_objects.REGEX_GEOPOINT + match = re.search(pattern, string) + if match: + assert match.group() == expected + + + + From f5fdb80ba93fceaa89418ad5b5614a5bf35a05c4 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 18 Sep 2024 14:48:37 -0700 Subject: [PATCH 020/115] Use decorator to require a login for csv stats --- impl/ui_admin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/impl/ui_admin.py b/impl/ui_admin.py index 39be899a9..85dec5f10 100644 --- a/impl/ui_admin.py +++ b/impl/ui_admin.py @@ -180,6 +180,7 @@ def _computeTotals(table): return totals +@impl.ui_common.user_login_required def csvStats(request): """Return all statistics to which a user is entitled as a CSV file 'requestor' is the user, and should be a User object. The statistics From 5dfd0b7b2b9110cfde0b621971c85c79d9e40092 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 18 Sep 2024 15:41:25 -0700 Subject: [PATCH 021/115] Insert hidden field in the month from and to so that the group/realm etc is maintained if resetting time frame --- templates/dashboard/_totalIdsByMonth.html | 2 ++ 1 file changed, 2 insertions(+) diff --git a/templates/dashboard/_totalIdsByMonth.html b/templates/dashboard/_totalIdsByMonth.html index 5304dde62..ab370ca41 100644 --- a/templates/dashboard/_totalIdsByMonth.html +++ b/templates/dashboard/_totalIdsByMonth.html @@ -5,6 +5,7 @@

{% trans "Total Identifiers Created By Month" %}
+ ARK (ark:/99999/fk4/... ) @@ -54,8 +53,7 @@

Create an identifier

- -
+
@@ -120,21 +118,19 @@

-
+
-
+
+
Choose an identifier type
-
Choose an identifier type
- - - - + -
+ +
diff --git a/dev/layout_home.html b/dev/layout_home.html index ba8f445ac..ae30ef75c 100755 --- a/dev/layout_home.html +++ b/dev/layout_home.html @@ -31,16 +31,15 @@

Home Layout With Login Modal

-

See how easy it is:

+

See how easy it is:

NOTE: This is just a demo. To create a real identifier, you must have an account.

-
- +
Choose an identifier type - + @@ -48,8 +47,7 @@

Home Layout With Login Modal

- -
+
diff --git a/templates/includes/simple_id_type.html b/templates/includes/simple_id_type.html index 1da3ebb65..c49da366b 100644 --- a/templates/includes/simple_id_type.html +++ b/templates/includes/simple_id_type.html @@ -1,14 +1,14 @@ {% load layout_extras %} {% load i18n %} -
+

{% trans "Choose an identifier type" %}

{% if calling_page == 'demo' %} {% help_icon "choose_id_demo" _("on choosing the type of identifier") %} {% else %} {% help_icon "choose_id" _("on choosing the type of identifier") %} {% endif %} -
+
{% if prefixes|duplicate_id_types %} {% comment %} class 'ays-ignore' is used by 'are-you-sure.js' which prevents users from accidentally leaving diff --git a/templates/index.html b/templates/index.html index 550e7a646..263cf3848 100644 --- a/templates/index.html +++ b/templates/index.html @@ -55,8 +55,7 @@

EZID: {% trans "Identifiers made easy" %}

-
- +
{% trans "Choose an identifier type" %} {% for p in prefixes %} @@ -65,8 +64,7 @@

EZID: {% trans "Identifiers made easy" %}

{% endfor %} {% help_icon "choose_id_demo" _("on choosing the type of identifier") %} - -
+
From 460974acfa15495397f57b4aba9f67dae3bcb0ca Mon Sep 17 00:00:00 2001 From: sfisher Date: Mon, 4 Nov 2024 16:34:15 -0800 Subject: [PATCH 047/115] Add to gulpfile to copy to the correct place so that the styles work and also updated stylesheet to get rid of extra borders and padding when I changed to fieldset. --- dev/scss/_fieldsets.scss | 3 +++ gulpfile.js | 9 ++++++- static_src/stylesheets/main2.min.css | 36 +++++++++++++++------------- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/dev/scss/_fieldsets.scss b/dev/scss/_fieldsets.scss index 11356a0ec..1e922d5f1 100755 --- a/dev/scss/_fieldsets.scss +++ b/dev/scss/_fieldsets.scss @@ -26,6 +26,9 @@ $subfieldset-label-spacing: 0.3em; .fieldset-inline { + border: none; + padding: 0; + @include bp(screen1) { display: flex; flex-flow: row wrap; diff --git a/gulpfile.js b/gulpfile.js index 82c4e3821..02729a7c4 100644 --- a/gulpfile.js +++ b/gulpfile.js @@ -31,7 +31,7 @@ var ghPages = require('gulp-gh-pages'); exports.default = parallel(scss, start, watcher); // exports.build = series(clean, fonts, scsslint_legacy, scsslint, jslint, scss_legacy, scss, assemble, minifyCss, copyimages, fonts); -exports.build = series(clean, fonts, jslint, scss_legacy, scss, assemble, minifyCss, copyimages, fonts); +exports.build = series(clean, fonts, jslint, scss_legacy, scss, assemble, minifyCss, copyimages, fonts, copyCSS); exports.upload = githubpages; @@ -124,6 +124,13 @@ function copyimages(cb) { cb(); } +// Copy the minified css to the place it actually needs to go in order to function +function copyCSS(cb) { + return src('ui_library/css/main2.min.css') + .pipe(dest('static_src/stylesheets')); + cb(); +} + // Copy font files from dev/fonts/ into dev/ui_library/fonts/: function fonts(cb) { diff --git a/static_src/stylesheets/main2.min.css b/static_src/stylesheets/main2.min.css index f3274b753..c348c560a 100644 --- a/static_src/stylesheets/main2.min.css +++ b/static_src/stylesheets/main2.min.css @@ -3,9 +3,9 @@ * Copyright©2021, Regents of the University of California * http://creativecommons.org/licenses/BSD */ -/*! - * Copyright©2021, Regents of the University of California - * http://creativecommons.org/licenses/BSD +/*! + * Copyright©2021, Regents of the University of California + * http://creativecommons.org/licenses/BSD */ /* @@ -133,18 +133,21 @@ Selector pattern using above mixin: .fieldset-controls { margin: 0 0 1em; } -@media (min-width: 480px) { - .fieldset-inline { - display: -webkit-box; - display: -ms-flexbox; - display: flex; - -webkit-box-orient: horizontal; - -webkit-box-direction: normal; - -ms-flex-flow: row wrap; - flex-flow: row wrap; - -webkit-box-align: center; - -ms-flex-align: center; - align-items: center; } } +.fieldset-inline { + border: none; + padding: 0; } + @media (min-width: 480px) { + .fieldset-inline { + display: -webkit-box; + display: -ms-flexbox; + display: flex; + -webkit-box-orient: horizontal; + -webkit-box-direction: normal; + -ms-flex-flow: row wrap; + flex-flow: row wrap; + -webkit-box-align: center; + -ms-flex-align: center; + align-items: center; } } .fieldset-stacked__embedded-select-append-text, .fieldset-controls__legend, .fieldset-stacked__legend, .fieldset-inline__legend, .fieldset-stacked__legend-number-1, @@ -883,7 +886,8 @@ Selector pattern using above mixin: .literal-block, .pre-block { margin: 1em 0; padding: 0.5em; - border: 1px dotted; + border: 1px dotted #beb6af; + background-color: #edeae5; font-size: 0.85em; } .literal-block { From 3ace00aff6ef676b96088361403f18ea9651d78b Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 6 Nov 2024 12:22:32 -0800 Subject: [PATCH 048/115] Most of the changes for #541 EXCEPT converting form switching into ajax without page reload --- dev/includes/home-banner.html | 6 +++--- dev/layout_home.html | 6 +++--- dev/legacy-scss/variables.scss | 4 ++-- static_src/stylesheets/main2.min.css | 8 ++++---- templates/index.html | 9 ++++++--- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/dev/includes/home-banner.html b/dev/includes/home-banner.html index 202409104..33fadd61c 100755 --- a/dev/includes/home-banner.html +++ b/dev/includes/home-banner.html @@ -2,7 +2,7 @@

Create and manage long-term globally unique IDs for data and other sources using EZID

- Learn More + Learn more about EZID
@@ -10,7 +10,7 @@

Using Alternative Image

-

Create and manage long-term globally unique IDs for data and other sources using EZID

- Learn More +

Create and manage long-term globally unique IDs for data and other sources

+ Learn more about EZID
diff --git a/dev/layout_home.html b/dev/layout_home.html index ae30ef75c..bb263128a 100755 --- a/dev/layout_home.html +++ b/dev/layout_home.html @@ -23,8 +23,8 @@

Home Layout With Login Modal

-

Create and manage long-term globally unique IDs for data and other sources using EZID

- Learn More +

Create and manage long-term globally unique IDs for data and other sources

+ Learn more about EZID
@@ -32,7 +32,7 @@

Home Layout With Login Modal

See how easy it is:

-

NOTE: This is just a demo. To create a real identifier, you must have an account.

+

NOTE: This is just a demo. To create a real identifier, you must have an account.

diff --git a/dev/legacy-scss/variables.scss b/dev/legacy-scss/variables.scss index a6e747187..3462cb0c7 100644 --- a/dev/legacy-scss/variables.scss +++ b/dev/legacy-scss/variables.scss @@ -19,8 +19,8 @@ $text-link-color: rgb(176, 189, 202); // muted blue #B0BDCA $text-link-over-beige: rgb(0, 86, 149); // #005695 blue link over beige backgrounds $design-primary-color: rgb(0, 119, 138); // Teal Blue #00778b -$design-primary-gradient-top: rgb(22, 139, 153); // Lighter Teal Blue #168b99 was #17A2AC -$design-primary-gradient-bottom: rgb(5, 106, 123); // Slightly darker Teal Blue #056a7b +$design-primary-gradient-top: rgb(6, 80, 89); // Lighter Teal Blue #168b99 was #17A2AC +$design-primary-gradient-bottom: rgb(1, 64, 71); // Slightly darker Teal Blue #056a7b // Brown colors $design-secondary-gradient-top: rgb(198, 176, 130); // #C6B082 Used for subnav diff --git a/static_src/stylesheets/main2.min.css b/static_src/stylesheets/main2.min.css index c348c560a..11c7a4c53 100644 --- a/static_src/stylesheets/main2.min.css +++ b/static_src/stylesheets/main2.min.css @@ -61,8 +61,8 @@ Selector pattern using above mixin: border-radius: 3px; } .button__primary, .button__multline, .account__button-proxy-select, .customize-table__button-all, .customize-table__button-customize, .login-modal__submit, .dashboard__button-go, .home__button-primary, .manage__simple-search-button, .search__simple-button, .search__simple-button2, .search-results__simple-button { - background: -webkit-gradient(linear, left top, left bottom, from(#168b99), to(#056a7b)); - background: linear-gradient(to bottom, #168b99, #056a7b); } + background: -webkit-gradient(linear, left top, left bottom, from(#065059), to(#014047)); + background: linear-gradient(to bottom, #065059, #014047); } .button__multline { height: 46px; } @@ -742,8 +742,8 @@ Selector pattern using above mixin: .heading__primary-container { margin: 0 -10px; padding: 0 10px; - background: -webkit-gradient(linear, left top, left bottom, from(#168b99), to(#056a7b)); - background: linear-gradient(to bottom, #168b99, #056a7b); } + background: -webkit-gradient(linear, left top, left bottom, from(#065059), to(#014047)); + background: linear-gradient(to bottom, #065059, #014047); } .heading__primary-text { max-width: 1000px; diff --git a/templates/index.html b/templates/index.html index 263cf3848..df3f11e6e 100644 --- a/templates/index.html +++ b/templates/index.html @@ -24,8 +24,8 @@

EZID: {% trans "Identifiers made easy" %}

-

{% trans "Create and manage long-term globally unique IDs for data and other sources using EZID" %}

- {% trans "Learn More" %} +

{% trans "Create and manage long-term globally unique IDs for data and other sources" %}

+ {% trans "Learn more about EZID" %}
@@ -68,7 +68,10 @@

EZID: {% trans "Identifiers made easy" %}

-
{% trans "Describe the identified object" %}
+
+ {% trans "Describe the identified object" %} +
{{ form.non_field_errors }} From 4b7f3130abc295a9dbb8ac2ab9b0bb37b30193cb Mon Sep 17 00:00:00 2001 From: sfisher Date: Thu, 7 Nov 2024 16:02:25 -0800 Subject: [PATCH 049/115] This gets the homepage "try" creation for identifiers working, but something has messed up the layout in "create id" now. --- impl/ui_home.py | 18 +++++++++ settings/urls.py | 1 + static_src/javascripts/simple_create_ajax.js | 29 ++++++++++++++ templates/create/_home_demo_form.html | 40 +++++++++++++++++++ templates/index.html | 42 +++----------------- 5 files changed, 93 insertions(+), 37 deletions(-) create mode 100644 static_src/javascripts/simple_create_ajax.js create mode 100644 templates/create/_home_demo_form.html diff --git a/impl/ui_home.py b/impl/ui_home.py index aa31a6672..12d08f505 100644 --- a/impl/ui_home.py +++ b/impl/ui_home.py @@ -35,6 +35,24 @@ def index(request): "/id/" + urllib.parse.quote(result.split()[1], ":/") ) # ID Details page +def ajax_index_form(request): + if request.method not in ["GET"]: + return impl.ui_common.methodNotAllowed(request) + d = {'menu_item': 'ui_home.index'} + d['prefixes'] = sorted( + django.conf.settings.TEST_SHOULDER_DICT, key=lambda p: p['namespace'].lower() + ) + d['form_placeholder'] = True # is this necessary? + d = impl.ui_create.simple_form(request, d) + result = d['id_gen_result'] + if result == 'edit_page': + # noinspection PyUnresolvedReferences + # return impl.ui_common.render(request, 'index', d) # ID Creation page + return impl.ui_common.render(request, 'create/_home_demo_form', d) + # return render(request, 'create/home_demo_form.html', d) + elif result == 'bad_request': + return impl.ui_common.badRequest(request) + def learn(request): if request.method != "GET": diff --git a/settings/urls.py b/settings/urls.py index 5d7c2917d..f3763377e 100644 --- a/settings/urls.py +++ b/settings/urls.py @@ -33,6 +33,7 @@ urlpatterns = [ # UI - RENDERED FROM TEMPLATES IN INFO REPOSITORY django.urls.re_path("^$", impl.ui_home.index, name="ui_home.index"), + django.urls.re_path("^home/ajax_index_form$", impl.ui_home.ajax_index_form, name="ui_home.ajax_index_form"), django.urls.re_path("^learn/$", impl.ui_home.learn, name="ui_home.learn"), django.urls.re_path("^learn/ark_open_faq$", impl.ui_home.ark_open_faq, name="ui_home.ark_open_faq"), django.urls.re_path("^learn/crossref_faq$", impl.ui_home.crossref_faq, name="ui_home.crossref_faq"), diff --git a/static_src/javascripts/simple_create_ajax.js b/static_src/javascripts/simple_create_ajax.js new file mode 100644 index 000000000..f371dc0b9 --- /dev/null +++ b/static_src/javascripts/simple_create_ajax.js @@ -0,0 +1,29 @@ +document.getElementById('form-shoulder').value = document.querySelectorAll('input[name=selshoulder]')[0].value; + +document.querySelectorAll('input[name="selshoulder"]').forEach(radio => { + radio.addEventListener('change', function () { + var profile; + if(this.value.startsWith('ark')) { + profile = 'erc'; + } else { + profile = 'datacite'; + } + + const form = document.querySelector('#create_form'); + const formData = new FormData(form); + formData.set('current_profile', profile); + + // Convert FormData to a query string + const queryString = new URLSearchParams(formData).toString(); + fetch(`/home/ajax_index_form?${queryString}`, { + headers: { + 'X-CSRFToken': document.querySelector('input[name="csrfmiddlewaretoken"]').value, + }, + }) + .then(response => response.text()) + .then(data => { + document.getElementById('form-container').innerHTML = data; // Replace form container HTML + document.getElementById('form-shoulder').value = this.value; // needs to happen after the form is replaced + }); + }); +}); \ No newline at end of file diff --git a/templates/create/_home_demo_form.html b/templates/create/_home_demo_form.html new file mode 100644 index 000000000..575c87cb6 --- /dev/null +++ b/templates/create/_home_demo_form.html @@ -0,0 +1,40 @@ +{% load i18n %} +{% load layout_extras %} + + + +{% csrf_token %} + +
+ +
+ {% trans "Describe the identified object" %} +
+ +
+ {{ form.non_field_errors }} + {% for field in form %} +
+
+ {% if field|fieldtype == "TextInput" %} + + {{ field|add_attributes:"fcontrol__text-field-stacked" }} + {% else %} + + {{ field|add_attributes:"fcontrol__select" }} + {% endif %} + {% if field.errors %} + {% for error in field.errors %}{{ error|escape }}{% endfor %} + {% endif %} +
+
+ {% endfor %} + +
+ +
+ +
+
+ \ No newline at end of file diff --git a/templates/index.html b/templates/index.html index df3f11e6e..227aedf7c 100644 --- a/templates/index.html +++ b/templates/index.html @@ -19,6 +19,7 @@ } {% endblock %} {% block content %} +{% csrf_token %}

EZID: {% trans "Identifiers made easy" %}

@@ -52,53 +53,20 @@

EZID: {% trans "Identifiers made easy" %}

-
-
{% trans "Choose an identifier type" %} {% for p in prefixes %} {% endfor %} {% help_icon "choose_id_demo" _("on choosing the type of identifier") %}
- -
- -
- {% trans "Describe the identified object" %} -
- -
- {{ form.non_field_errors }} - {% for field in form %} -
-
- {% if field|fieldtype == "TextInput" %} - - {{ field|add_attributes:"fcontrol__text-field-stacked" }} - {% else %} - - {{ field|add_attributes:"fcontrol__select" }} - {% endif %} - {% if field.errors %} - {% for error in field.errors %}{{ error|escape }}{% endfor %} - {% endif %} -
-
- {% endfor %} - -
- -
- -
+
+ {% include "create/_home_demo_form.html" %}
-
- + {% include "info/popup_help.html" %} From 7cb0ca165537f33b08ee7d529e321464f93c51ce Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 8 Nov 2024 16:34:38 -0800 Subject: [PATCH 050/115] Need to change this fieldset back to a div since otherwise it destroys the whole layout of the form. --- templates/includes/simple_id_type.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/includes/simple_id_type.html b/templates/includes/simple_id_type.html index c49da366b..d7cc2876d 100644 --- a/templates/includes/simple_id_type.html +++ b/templates/includes/simple_id_type.html @@ -1,14 +1,14 @@ {% load layout_extras %} {% load i18n %} -
+

{% trans "Choose an identifier type" %}

{% if calling_page == 'demo' %} {% help_icon "choose_id_demo" _("on choosing the type of identifier") %} {% else %} {% help_icon "choose_id" _("on choosing the type of identifier") %} {% endif %} -
+
{% if prefixes|duplicate_id_types %} {% comment %} class 'ays-ignore' is used by 'are-you-sure.js' which prevents users from accidentally leaving From 532c2c562d208b6c11c29e16c09de8d8abeff6fe Mon Sep 17 00:00:00 2001 From: sfisher Date: Fri, 8 Nov 2024 16:34:38 -0800 Subject: [PATCH 051/115] Need to change this fieldset back to a div since otherwise it destroys the whole layout of the form. --- templates/includes/simple_id_type.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/includes/simple_id_type.html b/templates/includes/simple_id_type.html index c49da366b..d7cc2876d 100644 --- a/templates/includes/simple_id_type.html +++ b/templates/includes/simple_id_type.html @@ -1,14 +1,14 @@ {% load layout_extras %} {% load i18n %} -
+

{% trans "Choose an identifier type" %}

{% if calling_page == 'demo' %} {% help_icon "choose_id_demo" _("on choosing the type of identifier") %} {% else %} {% help_icon "choose_id" _("on choosing the type of identifier") %} {% endif %} -
+
{% if prefixes|duplicate_id_types %} {% comment %} class 'ays-ignore' is used by 'are-you-sure.js' which prevents users from accidentally leaving From fac439fa005666b0f457ebbb8304921f0bedaedb Mon Sep 17 00:00:00 2001 From: sfisher Date: Tue, 12 Nov 2024 17:34:25 -0800 Subject: [PATCH 052/115] Change this to a heading, I guess I missed this particular place. --- templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/index.html b/templates/index.html index 227aedf7c..d2127354a 100644 --- a/templates/index.html +++ b/templates/index.html @@ -34,7 +34,7 @@

EZID: {% trans "Identifiers made easy" %}

-

{% trans "See how easy it is" %}:

+

{% trans "See how easy it is" %}:

{% comment %}Translators: Copy HTML tags over and only translate words outside of these tags i.e.:

TRANSLATE TEXT WRAPPED BY HTML TAGS

From a7ada1403f30cb88e3d1fa013ba04c0759f80988 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Wed, 13 Nov 2024 08:56:09 -0800 Subject: [PATCH 053/115] backup proc-cleanup-async-queues.py to _v1 --- .../commands/proc-cleanup-async-queues_v1.py | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 ezidapp/management/commands/proc-cleanup-async-queues_v1.py diff --git a/ezidapp/management/commands/proc-cleanup-async-queues_v1.py b/ezidapp/management/commands/proc-cleanup-async-queues_v1.py new file mode 100644 index 000000000..d9f578ff1 --- /dev/null +++ b/ezidapp/management/commands/proc-cleanup-async-queues_v1.py @@ -0,0 +1,137 @@ +#! /usr/bin/env python + +# Copyright©2021, Regents of the University of California +# http://creativecommons.org/licenses/BSD + +""" + +Clean up entries that are successfully completed or are a 'no-op' + +Identifier operation entries are retrieved by querying the database; +operations that successfully completed or are a no-op are deleted based on +pre-set interval. + +""" + +import logging +import time + +import django.conf +import django.conf +import django.db +import django.db.transaction + +import ezidapp.management.commands.proc_base +import ezidapp.models.identifier +import ezidapp.models.shoulder +from django.db.models import Q + +log = logging.getLogger(__name__) + +class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): + help = __doc__ + name = __name__ + + setting = 'DAEMONS_QUEUE_CLEANUP_ENABLED' + + queueType = { + 'crossref': ezidapp.models.async_queue.CrossrefQueue, + 'datacite': ezidapp.models.async_queue.DataciteQueue, + 'search': ezidapp.models.async_queue.SearchIndexerQueue + } + + refIdentifier = ezidapp.models.identifier.RefIdentifier + + def __init__(self): + super().__init__() + + + def run(self): + """ + Checks for the successfully processed identifier + + Args: + None + """ + # keep running until terminated + while not self.terminated(): + currentTime=int(time.time()) + timeDelta=django.conf.settings.DAEMONS_CHECK_IDENTIFIER_ASYNC_STATUS_TIMESTAMP + + # retrieve identifiers with update timestamp within a set range + refIdsQS = self.refIdentifier.objects.filter( + updateTime__lte=currentTime, + updateTime__gte=currentTime - timeDelta + ).order_by("-pk")[: django.conf.settings.DAEMONS_MAX_BATCH_SIZE] + + log.info("Checking ref Ids in the range: " + str(currentTime) + " - " + str(currentTime - timeDelta)) + + # iterate over query set to check each identifier status + for refId in refIdsQS: + + # set status for each handle system + identifierStatus = { + 'crossref' : False, + 'datacite' : False, + 'search' : False + } + + # check if the identifier is processed for each background job + for key, value in self.queueType.items(): + queue = value + + qs = queue.objects.filter( + Q(refIdentifier_id=refId.pk) + ) + + # if the identifier does not exist in the table + # mark as 'OK' to delete from the refIdentifier + if not qs: + identifierStatus[key] = True + continue + + for task_model in qs: + log.info('-' * 10) + log.info("Running job for identifier: " + refId.identifier + " in " + key + " queue") + + # delete identifier if the status is successfully synced or + # not applicable for this handle system + if (task_model.status==queue.SUCCESS or task_model.status==queue.IGNORED): + log.info( + "Delete identifier: " + refId.identifier + " in " + key + " queue") + identifierStatus[key] = True + self.deleteRecord(queue, task_model.pk, record_type=key, identifier=refId.identifier) + + # if the identifier is successfully processed for all the handle system + # delete it from the refIdentifier table + if all(i for i in identifierStatus.values()): + log.info( + "Delete identifier: " + refId.identifier + " from refIdentifier table.") + self.deleteRecord(self.refIdentifier, refId.pk, record_type='refId', identifier=refId.identifier) + + self.sleep(django.conf.settings.DAEMONS_BATCH_SLEEP) + + def deleteRecord(self, queue, primary_key, record_type=None, identifier=None): + """ + Deletes the identifier record that has been successfully completed + based on the record's primary key provided + + Args: + queue : async handle queue + primary_key (str): primary key of the record to be deleted. + record_type (str): . Defaults to None. + identifier (str): . Defaults to None. + """ + try: + # check if the record to be deleted is a refIdentifier record + if (record_type is not None and record_type == 'refId'): + log.info(type(queue)) + log.info("Delete refId: " + str(primary_key)) + queue.objects.filter(id=primary_key).delete() + else: + log.info("Delete async entry: " + str(primary_key)) + queue.objects.filter(seq=primary_key).delete() + except Exception as e: + log.error("Exception occured while processing identifier '" + identifier + "' for '" + + record_type + "' table") + log.error(e) From 728a861cd95ee69213a93d67470935d1699dfea9 Mon Sep 17 00:00:00 2001 From: jsjiang Date: Wed, 13 Nov 2024 08:57:05 -0800 Subject: [PATCH 054/115] replace proc-cleanup-async-queues with _v2 --- .../commands/proc-cleanup-async-queues.py | 144 +++++++++++++++--- 1 file changed, 126 insertions(+), 18 deletions(-) diff --git a/ezidapp/management/commands/proc-cleanup-async-queues.py b/ezidapp/management/commands/proc-cleanup-async-queues.py index d9f578ff1..85640407f 100644 --- a/ezidapp/management/commands/proc-cleanup-async-queues.py +++ b/ezidapp/management/commands/proc-cleanup-async-queues.py @@ -15,19 +15,22 @@ import logging import time +from datetime import datetime +from dateutil.parser import parse -import django.conf import django.conf import django.db -import django.db.transaction +from django.db import transaction +from django.db.models import Q import ezidapp.management.commands.proc_base import ezidapp.models.identifier import ezidapp.models.shoulder -from django.db.models import Q + log = logging.getLogger(__name__) + class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): help = __doc__ name = __name__ @@ -45,6 +48,29 @@ class Command(ezidapp.management.commands.proc_base.AsyncProcessingCommand): def __init__(self): super().__init__() + def add_arguments(self, parser): + super().add_arguments(parser) + parser.add_argument( + '--pagesize', help='Rows in each batch select.', type=int) + + parser.add_argument( + '--updated_range_from', type=str, + help = ( + 'Updated date range from - local date/time in ISO 8601 format without timezone \n' + 'YYYYMMDD, YYYYMMDDTHHMMSS, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS. \n' + 'Examples: 20241001, 20241001T131001, 2024-10-01, 2024-10-01T13:10:01 or 2024-10-01' + ) + ) + + parser.add_argument( + '--updated_range_to', type=str, + help = ( + 'Updated date range to - local date/time in ISO 8601 format without timezone \n' + 'YYYYMMDD, YYYYMMDDTHHMMSS, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS. \n' + 'Examples: 20241001, 20241001T131001, 2024-10-01, 2024-10-01T13:10:01 or 2024-10-01' + ) + ) + def run(self): """ @@ -53,21 +79,54 @@ def run(self): Args: None """ + ASYNC_CLEANUP_SLEEP = 60 * 10 + + BATCH_SIZE = self.opt.pagesize + if BATCH_SIZE is None: + BATCH_SIZE = 10000 + + updated_from = None + updated_to = None + updated_from_str = self.opt.updated_range_from + updated_to_str = self.opt.updated_range_to + if updated_from_str is not None: + try: + updated_from = self.date_to_seconds(updated_from_str) + except Exception as ex: + log.error(f"Input date/time error: {ex}") + exit() + if updated_to_str is not None: + try: + updated_to = self.date_to_seconds(updated_to_str) + except Exception as ex: + log.error(f"Input date/time error: {ex}") + exit() + + if updated_from is not None and updated_to is not None: + time_range = Q(updateTime__gte=updated_from) & Q(updateTime__lte=updated_to) + time_range_str = f"updated between: {updated_from_str} and {updated_to_str}" + elif updated_to is not None: + time_range = Q(updateTime__lte=updated_to) + time_range_str = f"updated before: {updated_to_str}" + else: + max_age_ts = int(time.time()) - django.conf.settings.DAEMONS_EXPUNGE_MAX_AGE_SEC + min_age_ts = max_age_ts - django.conf.settings.DAEMONS_EXPUNGE_MAX_AGE_SEC + time_range = Q(updateTime__gte=min_age_ts) & Q(updateTime__lte=max_age_ts) + time_range_str = f"updated between: {self.seconds_to_date(min_age_ts)} and {self.seconds_to_date(max_age_ts)}" + + last_id = 0 # keep running until terminated while not self.terminated(): - currentTime=int(time.time()) - timeDelta=django.conf.settings.DAEMONS_CHECK_IDENTIFIER_ASYNC_STATUS_TIMESTAMP + # retrieve identifiers with update timestamp within a date range + filter = time_range & Q(id__gt=last_id) + refIdsQS = self.refIdentifier.objects.filter(filter).order_by("pk")[: BATCH_SIZE] - # retrieve identifiers with update timestamp within a set range - refIdsQS = self.refIdentifier.objects.filter( - updateTime__lte=currentTime, - updateTime__gte=currentTime - timeDelta - ).order_by("-pk")[: django.conf.settings.DAEMONS_MAX_BATCH_SIZE] - - log.info("Checking ref Ids in the range: " + str(currentTime) + " - " + str(currentTime - timeDelta)) + log.info(f"Checking ref Ids: {time_range_str}") + log.info(f"Checking ref Ids returned: {len(refIdsQS)} records") # iterate over query set to check each identifier status for refId in refIdsQS: + last_id = refId.pk # set status for each handle system identifierStatus = { @@ -109,7 +168,20 @@ def run(self): "Delete identifier: " + refId.identifier + " from refIdentifier table.") self.deleteRecord(self.refIdentifier, refId.pk, record_type='refId', identifier=refId.identifier) - self.sleep(django.conf.settings.DAEMONS_BATCH_SLEEP) + if len(refIdsQS) < BATCH_SIZE: + if updated_from is not None or updated_to is not None: + log.info(f"Finished - Checking ref Ids: {time_range_str}") + exit() + else: + log.info(f"Sleep {ASYNC_CLEANUP_SLEEP} seconds before processing next time range.") + self.sleep(ASYNC_CLEANUP_SLEEP) + last_id = 0 + min_age_ts = max_age_ts + max_age_ts = int(time.time()) - django.conf.settings.DAEMONS_EXPUNGE_MAX_AGE_SEC + time_range = Q(updateTime__gte=min_age_ts) & Q(updateTime__lte=max_age_ts) + time_range_str = f"updated between: {self.seconds_to_date(min_age_ts)} and {self.seconds_to_date(max_age_ts)}" + else: + self.sleep(django.conf.settings.DAEMONS_BATCH_SLEEP) def deleteRecord(self, queue, primary_key, record_type=None, identifier=None): """ @@ -125,13 +197,49 @@ def deleteRecord(self, queue, primary_key, record_type=None, identifier=None): try: # check if the record to be deleted is a refIdentifier record if (record_type is not None and record_type == 'refId'): - log.info(type(queue)) - log.info("Delete refId: " + str(primary_key)) - queue.objects.filter(id=primary_key).delete() + log.info(f"Delete from {queue.__name__} refId: " + str(primary_key)) + with transaction.atomic(): + obj = queue.objects.select_for_update().get(id=primary_key) + obj.delete() else: - log.info("Delete async entry: " + str(primary_key)) - queue.objects.filter(seq=primary_key).delete() + log.info(f"Delete async queue {queue.__name__} entry: " + str(primary_key)) + with transaction.atomic(): + obj = queue.objects.select_for_update().get(seq=primary_key) + obj.delete() except Exception as e: log.error("Exception occured while processing identifier '" + identifier + "' for '" + record_type + "' table") log.error(e) + + + def date_to_seconds(self, date_time_str: str) -> int: + """ + Convert date/time string to seconds since the Epotch. + For example: + 2024-01-01 00:00:00 => 1704096000 + 2024-10-10 00:00:00 => 1728543600 + + Parameter: + date_time_str: A date/time string in in ISO 8601 format without timezone. + For example: 'YYYYMMDD, YYYYMMDDTHHMMSS, YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS. + + Returns: + int: seconds since the Epotch + + """ + + # Parse the date and time string to a datetime object + dt_object = parse(date_time_str) + + # Convert the datetime object to seconds since the Epoch + seconds_since_epoch = int(dt_object.timestamp()) + + return seconds_since_epoch + + + def seconds_to_date(self, seconds_since_epoch: int) -> str: + dt_object = datetime.fromtimestamp(seconds_since_epoch) + + # Format the datetime object to a string in the desired format + formatted_time = dt_object.strftime("%Y-%m-%dT%H:%M:%S") + return formatted_time \ No newline at end of file From 4fc55b6feed6b9a29c03b7324431a1dc2824b3e6 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 13 Nov 2024 15:07:45 -0800 Subject: [PATCH 055/115] The X button was not showing up at all in Chrome. This fixes that problem by adding to styles and re-compiling the SVG. --- dev/images/icon_cross.svg | 17 +++-------------- dev/scss/_login-modal.scss | 1 + static_src/images/icon_cross.svg | 22 +++------------------- static_src/stylesheets/main2.min.css | 4 +++- 4 files changed, 10 insertions(+), 34 deletions(-) mode change 100755 => 100644 dev/images/icon_cross.svg diff --git a/dev/images/icon_cross.svg b/dev/images/icon_cross.svg old mode 100755 new mode 100644 index ba2b3cc1f..a037959bd --- a/dev/images/icon_cross.svg +++ b/dev/images/icon_cross.svg @@ -1,14 +1,3 @@ - - - - - - - - + + + diff --git a/dev/scss/_login-modal.scss b/dev/scss/_login-modal.scss index 245e7808e..7c204d6b3 100644 --- a/dev/scss/_login-modal.scss +++ b/dev/scss/_login-modal.scss @@ -33,6 +33,7 @@ padding: 2px; background-color: $design-white-color; cursor: pointer; + object-fit: contain; // Or "cover" depending on the fit you need } .login-modal__form { diff --git a/static_src/images/icon_cross.svg b/static_src/images/icon_cross.svg index 6de30c8bb..a037959bd 100644 --- a/static_src/images/icon_cross.svg +++ b/static_src/images/icon_cross.svg @@ -1,19 +1,3 @@ - - - - - - - - - - + + + diff --git a/static_src/stylesheets/main2.min.css b/static_src/stylesheets/main2.min.css index 11c7a4c53..2d946b0ed 100644 --- a/static_src/stylesheets/main2.min.css +++ b/static_src/stylesheets/main2.min.css @@ -1521,7 +1521,9 @@ Selector pattern using above mixin: height: 15px; padding: 2px; background-color: white; - cursor: pointer; } + cursor: pointer; + -o-object-fit: contain; + object-fit: contain; } .login-modal__form { display: -webkit-box; From 0f04f5993532f91b009313068f7918e70c3765d4 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 13 Nov 2024 15:13:16 -0800 Subject: [PATCH 056/115] Give the close image alt text to show "close" for screen readers --- templates/includes/login-modal.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/templates/includes/login-modal.html b/templates/includes/login-modal.html index a3fa07491..4329458e8 100644 --- a/templates/includes/login-modal.html +++ b/templates/includes/login-modal.html @@ -2,7 +2,9 @@ diff --git a/static_src/info/learnsub_documentation.html b/static_src/info/learnsub_documentation.html index d621684f2..ea6a8d9f6 100644 --- a/static_src/info/learnsub_documentation.html +++ b/static_src/info/learnsub_documentation.html @@ -131,7 +131,7 @@

Status Information

For information about the status of the EZID system, please consider the following options:

API inquiries

-

The CDL System Status Page

+

The CDL System Status Page

The EZID Status Blog

The RSS Feed from the Status Blog

diff --git a/static_src/info/learnsub_media.html b/static_src/info/learnsub_media.html deleted file mode 100644 index 42b812259..000000000 --- a/static_src/info/learnsub_media.html +++ /dev/null @@ -1,23 +0,0 @@ -
-

Webinars and Presentations

- -

2018

-

EZID and N2T at CDL Presentation at UC DLFx, February 27, 2018

-

Four Unsolved Problems (with Persistent Identifiers) Presentation at PIDapalooza, Palau de Congressos de Girona, Spain, January 23, 2018

-

Compact Identifiers for Data Citation in Biomedicine and Beyond Presentation at PIDapalooza, Palau de Congressos de Girona, Spain, January 23, 2018

-

DOI service providers: 4 factors to consider

- -

2017

-

Software Citation Principles Presentation at DataCite Strategy Meeting (slides), April 3, 2017

-

Identifiers and Citation: Frequently Asked Questions Presentation at California Data Librarians conference (slides), March 14, 2017

-

Identifiers and Citation Webinar for CENDI (slides), Feb. 15, 2017

- -

 

- -

Customizable materials

- -

EZID Postcard for STEM outreach

- -

EZID Postcard for Digital Humanities outreach

- -
diff --git a/templates/includes/top.html b/templates/includes/top.html index 74b12a353..6718122a5 100644 --- a/templates/includes/top.html +++ b/templates/includes/top.html @@ -31,8 +31,7 @@ What Is EZID? Current Users Documentation - FAQ - Media + FAQ API Guide EZID Demo
diff --git a/templates/learn.html b/templates/learn.html index 05dd349db..d9a12f6bc 100644 --- a/templates/learn.html +++ b/templates/learn.html @@ -33,17 +33,17 @@

Documentation

{% include "info/learnsub_documentation.html" %} -
-
-

FAQ

-{% include "info/learnsub_faq.html" %} -
-
-

Media

-{% include "info/learnsub_media.html" %}
+ +
+
+

FAQ

+
+
+
+
From 473665ea4d0c32c31b801c036723d0343985c509 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 15 Jan 2025 12:37:31 -0800 Subject: [PATCH 100/115] Removing arks in the open faq where I can see it. --- impl/ui_home.py | 8 --- settings/urls.py | 1 - static_src/info/learnsub_faq.html | 1 - static_src/locale/fr_CA/LC_MESSAGES/django.po | 60 ------------------- 4 files changed, 70 deletions(-) diff --git a/impl/ui_home.py b/impl/ui_home.py index 12d08f505..6d5307349 100644 --- a/impl/ui_home.py +++ b/impl/ui_home.py @@ -62,14 +62,6 @@ def learn(request): return impl.ui_common.render(request, 'learn', d) -def ark_open_faq(request): - if request.method != "GET": - return impl.ui_common.methodNotAllowed(request) - d = {'menu_item': 'ui_home.learn'} - # noinspection PyUnresolvedReferences - return impl.ui_common.render(request, 'info/ark_open_faq', d) - - def crossref_faq(request): if request.method != "GET": return impl.ui_common.methodNotAllowed(request) diff --git a/settings/urls.py b/settings/urls.py index f3763377e..6ce67e86e 100644 --- a/settings/urls.py +++ b/settings/urls.py @@ -35,7 +35,6 @@ django.urls.re_path("^$", impl.ui_home.index, name="ui_home.index"), django.urls.re_path("^home/ajax_index_form$", impl.ui_home.ajax_index_form, name="ui_home.ajax_index_form"), django.urls.re_path("^learn/$", impl.ui_home.learn, name="ui_home.learn"), - django.urls.re_path("^learn/ark_open_faq$", impl.ui_home.ark_open_faq, name="ui_home.ark_open_faq"), django.urls.re_path("^learn/crossref_faq$", impl.ui_home.crossref_faq, name="ui_home.crossref_faq"), django.urls.re_path("^learn/doi_services_faq$", impl.ui_home.doi_services_faq, name="ui_home.doi_services_faq",), django.urls.re_path("^learn/id_basics$", impl.ui_home.id_basics, name="ui_home.id_basics"), diff --git a/static_src/info/learnsub_faq.html b/static_src/info/learnsub_faq.html index 6b335ad70..6a92dd513 100644 --- a/static_src/info/learnsub_faq.html +++ b/static_src/info/learnsub_faq.html @@ -62,5 +62,4 @@

Are we eligible to participate in your Crossref service?

DOI Services FAQs

-

ARKs in the Open FAQs

diff --git a/static_src/locale/fr_CA/LC_MESSAGES/django.po b/static_src/locale/fr_CA/LC_MESSAGES/django.po index 2864121e4..e1e4e7a8f 100644 --- a/static_src/locale/fr_CA/LC_MESSAGES/django.po +++ b/static_src/locale/fr_CA/LC_MESSAGES/django.po @@ -2069,66 +2069,6 @@ msgid "" "will enable data sharing and publication.

" msgstr "" -#: templates/info/ark_open_faq.html:6 -msgid "ARKs in the Open FAQ" -msgstr "" - -#: templates/info/ark_open_faq.html:7 -msgid "ARKs in the Open: Frequently Asked Questions" -msgstr "" - -#. Translators: Copy HTML tags over and only translate words outside of these tags -#. i.e.:

TRANSLATE TEXT WRAPPED BY HTML TAGS

-#. i.e.: PLEASE PRESERVE HTML TAGS -#: templates/info/ark_open_faq.html:18 -msgid "" -"

What is ARKs in the Open?

ARKs in the Open is a " -"collaboration between CDL and DuraSpace aimed at building an open, international " -"community around Archival Resource Keys (ARKs) and their use as persistent identifiers " -"in the open scholarly ecosystem.

Why is CDL working on the " -"ARKs in the Open initiative

We (CDL) are launching this " -"pilot project as a first step toward ensuring the ongoing health and " -"development of the ARK infrastructure. By \"ARK infrastructure,\" we mean " -"open source software tools and systems, the ARK specification, and " -"production-grade resolver services. Community ownership of ARK identifier " -"assets has been under discussion for over a decade, and the project makes a " -"start by establishing an active ARK community group with the aims of:

" -"

1. Maintaining two key assets: the ARK specification and the NAAN " -"registry, and

2. Submitting the current ARK specification to " -"the IETF as in Internet Informational RFC.

What is " -"DuraSpace's role

Since 2001, CDL has served as the " -"\"incubator\" for ARKs. Now, to achieve long-term sustainability, ARKs and " -"the ARK infrastructure must emerge from CDL and mature in partnership with " -"multiple organizations and community participants to guide its future. To " -"jumpstart the process, CDL sought a collaboration with DuraSpace, an " -"independent not-for-profit organization providing leadership for open " -"technologies and communities such as Fedora, DSpace, and VIVO.

" -"

Does this mean EZID or CDL will stop offering ARK identifiers?

Not at all. EZID will continue to provide full services for " -"the creation and maintenance of ARKs and other identifiers. CDL expects to " -"play a prominent role in whatever community partnership emerges around ARKs " -"and ARK infrastructure.

Where can I get more detailed " -"information about ARKs in the Open?

The project's home " -"page: https://wiki.duraspace.org/display/DSP/ARKs+in" -"+the+Open+Project

How can I stay informed about ARKs in " -"the Open?

Regular updates will be posted to the ARK mailing " -"list. Join the list here: https://groups.google.com/forum/#!forum/arks-" -"forum

How can I get more involved?

" -"

Express your interest here: http://bit.ly/2C4fU8f

Or give us feedback here: https://wiki.duraspace.org/display/DSP/Project+Vision

" -"

I have more questions about ARKs. Where can I get answers?

All about ARKs: http://n2t.net/e/ark_ids.html

Or, contact the CDL ARKs in the Open " -"team

" -msgstr "" - #: templates/info/crossref_faq.html:6 msgid "Crossref FAQ" msgstr "" From 5a96abd009b7d04d9bf7df1067d107c2269a3f53 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 15 Jan 2025 12:40:31 -0800 Subject: [PATCH 101/115] I believe removes the rest of the ARKs in the Open FAQ things --- static_src/info/ark_open_faq.html | 60 ------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 static_src/info/ark_open_faq.html diff --git a/static_src/info/ark_open_faq.html b/static_src/info/ark_open_faq.html deleted file mode 100644 index c5d47282f..000000000 --- a/static_src/info/ark_open_faq.html +++ /dev/null @@ -1,60 +0,0 @@ -{% extends "layouts/learn.html" %} -{% load layout_extras %} -{% load menus %} -{% load i18n %} - -{% block title %}{% block title-twitter %}{% trans "ARKs in the Open FAQ" %}{% endblock %}{% endblock %} -{% block heading %}{% content_heading _("ARKs in the Open: Frequently Asked Questions") %}{% endblock %} -{% block content %} - -{% comment %}Don't need to translate this{% endcomment %} -{% learn_breadcrumb 'ARKs in the Open FAQ' 'FAQ' '#05' %} -
-{% comment %}Translators: Copy HTML tags over and only translate words outside of these tags -i.e.:

TRANSLATE TEXT WRAPPED BY HTML TAGS

-i.e.: PLEASE PRESERVE HTML TAGS -{% endcomment %} -{% blocktrans trimmed %} -

What is ARKs in the Open?

- -

ARKs in the Open is a collaboration between CDL and DuraSpace aimed at building an open, international community around Archival Resource Keys (ARKs) and their use as persistent identifiers in the open scholarly ecosystem.

- - -

Why is CDL working on the ARKs in the Open initiative

- -

We (CDL) are launching this pilot project as a first step toward ensuring the ongoing health and development of the ARK infrastructure. By "ARK infrastructure," we mean open source software tools and systems, the ARK specification, and production-grade resolver services. Community ownership of ARK identifier assets has been under discussion for over a decade, and the project makes a start by establishing an active ARK community group with the aims of:

-

1. Maintaining two key assets: the ARK specification and the NAAN registry, and

-

2. Submitting the current ARK specification to the IETF as in Internet Informational RFC.

- -

What is DuraSpace's role

- -

Since 2001, CDL has served as the "incubator" for ARKs. Now, to achieve long-term sustainability, ARKs and the ARK infrastructure must emerge from CDL and mature in partnership with multiple organizations and community participants to guide its future. -To jumpstart the process, CDL sought a collaboration with DuraSpace, an independent not-for-profit organization providing leadership for open technologies and communities such as Fedora, DSpace, and VIVO.

- -

Does this mean EZID or CDL will stop offering ARK identifiers?

- -

Not at all. EZID will continue to provide full services for the creation and maintenance of ARKs and other identifiers. CDL expects to play a prominent role in whatever community partnership emerges around ARKs and ARK infrastructure. -

- -

Where can I get more detailed information about ARKs in the Open?

- -

The project's home page: https://wiki.duraspace.org/display/DSP/ARKs+in+the+Open+Project

- -

How can I stay informed about ARKs in the Open?

-

Regular updates will be posted to the ARK mailing list. Join the list here: https://groups.google.com/forum/#!forum/arks-forum

- -

How can I get more involved?

-

Express your interest here: http://bit.ly/2C4fU8f

-

Or give us feedback here: https://wiki.duraspace.org/display/DSP/Project+Vision

- -

I have more questions about ARKs. Where can I get answers?

-

All about ARKs: http://n2t.net/e/ark_ids.html

-

Or, contact the CDL ARKs in the Open team

- - - - - -{% endblocktrans %} -
-{% endblock %} From 09f62f8d2e98410dd294597d866d6ea9e36f1c48 Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 15 Jan 2025 14:19:52 -0800 Subject: [PATCH 102/115] Fixing the FAQ for registration services --- static_src/info/doi_services_faq.html | 82 +++++++++++-------- static_src/info/learnsub_faq.html | 2 +- static_src/locale/fr_CA/LC_MESSAGES/django.po | 18 ++-- 3 files changed, 56 insertions(+), 46 deletions(-) diff --git a/static_src/info/doi_services_faq.html b/static_src/info/doi_services_faq.html index 32852f872..4277ea835 100644 --- a/static_src/info/doi_services_faq.html +++ b/static_src/info/doi_services_faq.html @@ -3,49 +3,63 @@ {% load menus %} {% load i18n %} -{% block title %}{% block title-twitter %}{% trans "DOI Services FAQ" %}{% endblock %}{% endblock %} -{% block heading %}{% content_heading _("DOI Services: Frequently Asked Questions") %}{% endblock %} +{% block title %}{% block title-twitter %}{% trans "Registration Services FAQ" %}{% endblock %}{% endblock %} +{% block heading %}{% content_heading _("Frequently Asked Questions") %}{% endblock %} {% block content %} {% comment %}Don't need to translate this{% endcomment %} -{% learn_breadcrumb 'DOI Services FAQ' 'FAQ' '#05' %} +{% learn_breadcrumb 'Registration Services FAQ' 'FAQ' '#05' %}
{% comment %}Translators: Copy HTML tags over and only translate words outside of these tags i.e.:

TRANSLATE TEXT WRAPPED BY HTML TAGS

i.e.: PLEASE PRESERVE HTML TAGS {% endcomment %} {% blocktrans trimmed %} -

Can I get a new DOI account?

- -

We are no longer able to issue new DOI service accounts to non-University of California entities. - -

Can I get a new ARK account?

- -

Yes. - -

Why are you changing EZID's service offering in this way?

- -

DOI service providers have evolved considerably over the last 5-10 years, and after careful consideration, we have adopted the following new strategy direction: to endorse and support DataCite's long-term sustainability, while maintaining our own distinct role in the persistent identifier (PID) space. - -

What alternatives do I have for DOI services?

- -

Option 1: Direct membership in DataCite. This is an especially good option if you seek a participatory role in the data citation and data curation community. In addition, we are working closely with DataCite to make sure popular EZID features (such as link checking and the usage dashboard) make their way into DataCite's feature set. If you'd like to know more, visit https://www.datacite.org/contact.html. - -

Option 2: Become a client of another DataCite member. Here is a list of current members: https://www.datacite.org/members.html. - -

Option 3: Use the services of a repository that provides DOIs, such as Dryad, Dataverse or figshare. This is a good option if you need only a very small number of DOIs. - -

Option 4: Become a client of Crossref. Crossref's annual member fee is based on the publishing revenue of the organization. For those organizations with less than $1M in publishing revenue the annual fee is $275. Crossref accepts data, text and preprints. You can explore Crossref's website for more information and when you are ready to apply for membership, complete this form: http://www.crossref.org/membership/join. - -

If you would like help assessing the differences between Crossref and DataCite services, please contact us - -

If I already have an EZID account, can I upgrade or change it?

- -

You can certainly add new ARK services. If you have other questions, please contact us.

- -

How does all of this affect University of California EZID account holders?

- -

University of California clients and prospective clients are not affected by this change. If you have any questions, please contact us.

+

How do I obtain a new account?

+ +

+ New accounts for ARK and DOI services are available to University of California users only, with access being + managed at the campus-level through the library. Please reach out to your campus library directly to obtain an account. +

+
+

What alternatives do I have for DOI and ARK registration services?

+ +

For DOI services:

+ +

Option 1: Use the services of a repository that provides DOIs, such as + Dryad, + Dataverse or + figshare. + This is a good option if you need only a very small number of DOIs. +

+ +

+ Option 2: Direct membership in DataCite. + See https://www.datacite.org for additional details. +

+ +

+ Option 3: Direct membership in Crossref. + See https://www.crossref.org for additional details. +

+ +

For ARK services:

+ +

+ The ARK Alliance offers many + resources to help users get started with ARKs, including + setting up your own infrastructure. See + https://arks.org for additional details. +

+ +
+

+ If I already have an EZID account, can I upgrade or change it? +

+ +

+ For assistance with changes to existing accounts, please contact us. +

{% endblocktrans %}
diff --git a/static_src/info/learnsub_faq.html b/static_src/info/learnsub_faq.html index 6a92dd513..9b031bf32 100644 --- a/static_src/info/learnsub_faq.html +++ b/static_src/info/learnsub_faq.html @@ -61,5 +61,5 @@

Are we eligible to participate in your Crossref service? -

DOI Services FAQs

+

Registration Services FAQs

diff --git a/static_src/locale/fr_CA/LC_MESSAGES/django.po b/static_src/locale/fr_CA/LC_MESSAGES/django.po index e1e4e7a8f..61eb997c4 100644 --- a/static_src/locale/fr_CA/LC_MESSAGES/django.po +++ b/static_src/locale/fr_CA/LC_MESSAGES/django.po @@ -2148,11 +2148,11 @@ msgid "" msgstr "" #: templates/info/doi_services_faq.html:6 -msgid "DOI Services FAQ" +msgid "Registration Services FAQ" msgstr "" #: templates/info/doi_services_faq.html:7 -msgid "DOI Services: Frequently Asked Questions" +msgid "Frequently Asked Questions" msgstr "" #. Translators: Copy HTML tags over and only translate words outside of these tags @@ -2160,15 +2160,11 @@ msgstr "" #. i.e.: PLEASE PRESERVE HTML TAGS #: templates/info/doi_services_faq.html:18 msgid "" -"

Can I get a new DOI account?

We are no longer " -"able to issue new DOI service accounts to non-University of California " -"entities.

Can I get a new ARK account?

Yes. " -"

Why are you changing EZID's service offering in this way?

DOI service providers have evolved considerably over the last " -"5-10 years, and after careful consideration, we have adopted the following " -"new strategy direction: to endorse and support DataCite's long-term " -"sustainability, while maintaining our own distinct role in the persistent " -"identifier (PID) space.

What alternatives do I have for DOI " +"

How do I obtain a new account?

New accounts for ARK " +"and DOI services are available to University of California users only, with " +"access being managed at the campus-level through the library. Please reach " +"out to your campus library directly to obtain an account.

" +"

What alternatives do I have for DOI " "services?

Option 1: Direct membership in DataCite. This is an especially " "good option if you seek a participatory role in the data citation and data " From 823f97e5c7b5bfa0429c8bc27f6f0a951e668daf Mon Sep 17 00:00:00 2001 From: sfisher Date: Wed, 15 Jan 2025 14:27:15 -0800 Subject: [PATCH 103/115] remove ezid services for hydra (Hydra is now Samvera, but I couldn't find the page about services for Samvera so removing) --- static_src/info/open_source.html | 2 -- 1 file changed, 2 deletions(-) diff --git a/static_src/info/open_source.html b/static_src/info/open_source.html index 368b54336..f18109599 100644 --- a/static_src/info/open_source.html +++ b/static_src/info/open_source.html @@ -24,8 +24,6 @@