Added pydub

midas-research · rohan220217 · Apr 20, 2024 · Apr 26, 2024 · May 6, 2024 · May 6, 2024
commit 2d290655982fb384970b0fe53d66d042db2203a6
diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
@@ -22,6 +22,7 @@
 from django.conf import settings
 from django.db import transaction
 from django.db.models.query import Prefetch
+from cvat.apps.engine.models import Job, Label, AttributeSpec
 from django.utils import timezone
 from rest_framework.exceptions import ValidationError
 
@@ -993,6 +994,34 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
     # All Annotations
     annotations = job.data["shapes"]
 
+    # Job detail
+
+    # Find labels of a particular job
+    job_details = Job.objects.get(id=job_id)
+    labels_queryset = job_details.get_labels()
+    labels_list = list(labels_queryset.values())
+
+    labels_mapping = {}
+
+    for label in labels_list:
+        labels_mapping[label["id"]] = label
+
+        label_attributes_queryset = AttributeSpec.objects.filter(label=label["id"])
+
+        attributes_list = list(label_attributes_queryset.values())
+
+        labels_mapping[label["id"]]["attributes"] = {}
+
+        for attribute in attributes_list:
+            labels_mapping[label["id"]]["attributes"][attribute["id"]] = attribute
+
+        slogger.glob.debug("JOB LABELS ATTRIBUTES")
+        slogger.glob.debug(json.dumps(attributes_list))
+
+
+    slogger.glob.debug("JOB LABELS")
+    slogger.glob.debug(json.dumps(labels_list))
+
     audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav")
     with wave.open(audio_file_path, 'wb') as wave_file:
         wave_file.setnchannels(1)
@@ -1003,20 +1032,30 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
     annotation_audio_chunk_file_paths = chunk_annotation_audio(audio_file_path, temp_dir, annotations)
 
     for i in range(0, len(annotation_audio_chunk_file_paths)):
-        final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"] })
-
+        annotation_attribute_id = annotations[i]["attributes"][0]["spec_id"]
+        label_attributes = labels_mapping[annotations[i]["label_id"]]["attributes"]
+        annotation_attribute = label_attributes[annotation_attribute_id]
+        attribute_name = annotation_attribute["name"]
+        attribute_val = annotations[i]["attributes"][0]["value"]
+
+        final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"], "label" : labels_mapping[annotations[i]["label_id"]]["name"], "attribute_name" : attribute_name, "attribute_value" : attribute_val })
+
+    slogger.glob.debug("JOB ANNOTATION DATA")
+    slogger.glob.debug(json.dumps(final_data))
+    slogger.glob.debug("All  ANNOTATIONs DATA")
+    slogger.glob.debug(json.dumps(annotations))
     return final_data, annotation_audio_chunk_file_paths
 
 def convert_annotation_data_format(data, format_name):
     if format_name == "Common Voice":
         return data
     elif format_name == "Librispeech":
-        data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"]}, data))
+        data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))
     elif format_name == "VoxPopuli":
         language_id_mapping = {"en" : 0}
-        data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accent"]}, data))
+        data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accent"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))
     elif format_name == "Ted-Lium":
-        data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : ""}, data))
+        data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : "", "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))
 
     return data
 def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False):

diff --git a/cvat/apps/engine/backup.py b/cvat/apps/engine/backup.py
@@ -232,6 +232,10 @@ def _prepare_annotations(self, annotations, label_mapping):
             'attributes',
             'shapes',
             'elements',
+            'gender',
+            'age',
+            'accent',
+            'transcript'
         }
 
         def _update_attribute(attribute, label):
@@ -470,7 +474,7 @@ def _export_task(self, zip_obj, target_dir=None):
         self._write_data(zip_obj, target_dir)
         self._write_task(zip_obj, target_dir)
         self._write_manifest(zip_obj, target_dir)
-        self._write_annotations(zip_obj, target_dir)
+        # self._write_annotations(zip_obj, target_dir)
         self._write_annotation_guide(zip_obj, target_dir)
 
     def export_to(self, file, target_dir=None):

diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py
@@ -1043,18 +1043,18 @@ def _is_zip(path):
         'mode': 'annotation',
         'unique': False,
     },
-    'video': {
-        'has_mime_type': _is_video,
-        'extractor': VideoReader,
-        'mode': 'interpolation',
-        'unique': True,
-    },
     'audio': {
         'has_mime_type': _is_audio,
         'extractor': AudioReader,
         'mode': 'interpolation',
         'unique': False,
     },
+    'video': {
+        'has_mime_type': _is_video,
+        'extractor': VideoReader,
+        'mode': 'interpolation',
+        'unique': True,
+    },
     'archive': {
         'has_mime_type': _is_archive,
         'extractor': ArchiveReader,

diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py
@@ -245,6 +245,9 @@ def _count_files(data):
     def count_files(file_mapping, counter):
         for rel_path, full_path in file_mapping.items():
             mime = get_mime(full_path)
+
+            slogger.glob.debug("Mimetype")
+            slogger.glob.debug(mime)
             if mime in counter:
                 counter[mime].append(rel_path)
             elif rel_path.endswith('.jsonl'):
@@ -686,6 +689,7 @@ def _create_thread(
 
     # count and validate uploaded files
     media = _count_files(data)
+
     media, task_mode = _validate_data(media, manifest_files)
 
     if job_file_mapping is not None and task_mode != 'annotation':

diff --git a/cvat/requirements/base.in b/cvat/requirements/base.in
@@ -53,4 +53,5 @@ rq==1.15.1
 rules>=3.3
 Shapely==1.7.1
 tensorflow==2.11.1 # Optional requirement of Datumaro. Use tensorflow-macos==2.8.0 for Mac M1
-soundfile==0.12.1
+soundfile==0.12.1
+pydub==0.25.1
diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt
@@ -409,3 +409,4 @@ setuptools==68.2.2
     #   tensorflow
 
 soundfile==0.12.1
+pydub==0.25.1
diff --git a/cvat/requirements/development.in b/cvat/requirements/development.in
@@ -8,4 +8,5 @@ pylint-plugin-utils==0.7
 pylint==2.14.5
 rope==0.17.0
 snakeviz==2.1.0
-soundfile==0.12.1
+soundfile==0.12.1
+pydub==0.25.1
diff --git a/cvat/requirements/development.txt b/cvat/requirements/development.txt
@@ -62,5 +62,5 @@ tornado==6.3.3
     # via snakeviz
 
 soundfile==0.12.1
-
+pydub==0.25.1
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/cvat/requirements/production.txt b/cvat/requirements/production.txt
@@ -29,4 +29,5 @@ watchfiles==0.20.0
 websockets==11.0.3
     # via uvicorn
 soundfile==0.12.1
+pydub==0.25.1
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/cvat/settings/email_settings.py b/cvat/settings/email_settings.py
@@ -10,7 +10,7 @@
 ACCOUNT_AUTHENTICATION_METHOD = 'username_email'
 ACCOUNT_CONFIRM_EMAIL_ON_GET = True
 ACCOUNT_EMAIL_REQUIRED = True
-ACCOUNT_EMAIL_VERIFICATION = 'mandatory'
+ACCOUNT_EMAIL_VERIFICATION = 'none'
 
 # Email backend settings for Django
 EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
Original file line number	Diff line number	Diff line change
Expand Up		@@ -409,3 +409,4 @@ setuptools==68.2.2
		# tensorflow

		soundfile==0.12.1
		pydub==0.25.1