Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/audino v2 #5

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
0e42dae
bug fix
kushalpoddar Apr 20, 2024
6589411
Email templating changes
kushalpoddar Apr 26, 2024
2d29065
Added pydub
kushalpoddar May 6, 2024
2da6517
alter col
kushalpoddar May 6, 2024
74e6743
added new audio total durstion field
kushalpoddar May 6, 2024
6a602b6
Updated migrations
kushalpoddar May 8, 2024
97a158b
Audio duration finding using av
kushalpoddar May 9, 2024
bfaebcc
local
kushalpoddar May 9, 2024
531de53
Added UI Url
kushalpoddar May 9, 2024
44df236
Bug fix
kushalpoddar May 9, 2024
c696325
Bug fix
kushalpoddar May 10, 2024
5c8895a
readded pydub
kushalpoddar May 10, 2024
0799c54
bug fix for av
kushalpoddar May 10, 2024
9991d03
Voxpopuli bug fix and segment size rechanged
kushalpoddar May 21, 2024
16e0509
added email notification when annotation is done
ashish7515 May 26, 2024
f3beafa
Bug fixes
kushalpoddar May 29, 2024
111d2b6
resolve bug for large files
ashish7515 May 30, 2024
32a065b
merge conflicts
ashish7515 May 30, 2024
0bde3ff
update
ashish7515 May 30, 2024
62e94bb
bug fix
kushalpoddar May 30, 2024
0ff4e30
Encoding bugresolved for audio
kushalpoddar May 30, 2024
4cc294c
Added chardet to requirements
kushalpoddar May 30, 2024
e2b4c07
bug fix for audios
kushalpoddar May 30, 2024
9b7d1ac
Merge pull request #2 from midas-research/my-branch
rohan220217 Jun 3, 2024
030e245
start end in download csv, mp3 format, typo error
ashish7515 Jun 5, 2024
ebe46a5
comment removed
ashish7515 Jun 6, 2024
2cf5d5a
Merge pull request #3 from midas-research/my-branch
rohan220217 Jun 6, 2024
a52c010
Merge branch 'feat/audino-v2' of https://github.com/midas-research/cv…
ashish7515 Jun 15, 2024
f23cd60
conflict resolved
ashish7515 Jun 15, 2024
a628c20
Merge pull request #4 from midas-research/feat/ground_truth
rohan220217 Jun 15, 2024
95253ff
rebased
kushalpoddar Jun 16, 2024
6779ccf
Bug fixes
kushalpoddar Jun 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Added pydub
  • Loading branch information
kushalpoddar committed May 6, 2024
commit 2d290655982fb384970b0fe53d66d042db2203a6
49 changes: 44 additions & 5 deletions cvat/apps/dataset_manager/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from django.conf import settings
from django.db import transaction
from django.db.models.query import Prefetch
from cvat.apps.engine.models import Job, Label, AttributeSpec
from django.utils import timezone
from rest_framework.exceptions import ValidationError

Expand Down Expand Up @@ -993,6 +994,34 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
# All Annotations
annotations = job.data["shapes"]

# Job detail

# Find labels of a particular job
job_details = Job.objects.get(id=job_id)
labels_queryset = job_details.get_labels()
labels_list = list(labels_queryset.values())

labels_mapping = {}

for label in labels_list:
labels_mapping[label["id"]] = label

label_attributes_queryset = AttributeSpec.objects.filter(label=label["id"])

attributes_list = list(label_attributes_queryset.values())

labels_mapping[label["id"]]["attributes"] = {}

for attribute in attributes_list:
labels_mapping[label["id"]]["attributes"][attribute["id"]] = attribute

slogger.glob.debug("JOB LABELS ATTRIBUTES")
slogger.glob.debug(json.dumps(attributes_list))


slogger.glob.debug("JOB LABELS")
slogger.glob.debug(json.dumps(labels_list))

audio_file_path = os.path.join(temp_dir, str(job_id) + ".wav")
with wave.open(audio_file_path, 'wb') as wave_file:
wave_file.setnchannels(1)
Expand All @@ -1003,20 +1032,30 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir):
annotation_audio_chunk_file_paths = chunk_annotation_audio(audio_file_path, temp_dir, annotations)

for i in range(0, len(annotation_audio_chunk_file_paths)):
final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"] })

annotation_attribute_id = annotations[i]["attributes"][0]["spec_id"]
label_attributes = labels_mapping[annotations[i]["label_id"]]["attributes"]
annotation_attribute = label_attributes[annotation_attribute_id]
attribute_name = annotation_attribute["name"]
attribute_val = annotations[i]["attributes"][0]["value"]

final_data.append({"path" : os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence" : annotations[i]["transcript"], "age" : annotations[i]["age"], "gender" : annotations[i]["gender"], "accents" : annotations[i]["accent"], "locale" : annotations[i]["locale"], "emotion" : annotations[i]["emotion"], "label" : labels_mapping[annotations[i]["label_id"]]["name"], "attribute_name" : attribute_name, "attribute_value" : attribute_val })

slogger.glob.debug("JOB ANNOTATION DATA")
slogger.glob.debug(json.dumps(final_data))
slogger.glob.debug("All ANNOTATIONs DATA")
slogger.glob.debug(json.dumps(annotations))
return final_data, annotation_audio_chunk_file_paths

def convert_annotation_data_format(data, format_name):
if format_name == "Common Voice":
return data
elif format_name == "Librispeech":
data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"]}, data))
data = list(map(lambda x: {"chapter_id" : "", "file" : x["path"], "id" : str(uuid.uuid4()), "speaker_id" : "", "text" : x["sentence"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))
elif format_name == "VoxPopuli":
language_id_mapping = {"en" : 0}
data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accent"]}, data))
data = list(map(lambda x: {"audio_id" : str(uuid.uuid4()), "language" : language_id_mapping[x["locale"]] if language_id_mapping.get(x["locale"]) else None, "audio_path" : x["path"], "raw_text" : x["sentence"], "normalized_text" : x["sentence"], "gender" : x["gender"], "speaker_id" : "", "is_gold_transcript" : False, "accent" : x["accent"], "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))
elif format_name == "Ted-Lium":
data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : ""}, data))
data = list(map(lambda x: {"file" : x["path"], "text" : x["sentence"], "gender" : x["gender"], "id" : str(uuid.uuid4()), "speaker_id" : "", "label" : x["label"], "attribute_name" : x["attribute_name"], "attribute_value" : x["attribute_value"]}, data))

return data
def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False):
Expand Down
6 changes: 5 additions & 1 deletion cvat/apps/engine/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ def _prepare_annotations(self, annotations, label_mapping):
'attributes',
'shapes',
'elements',
'gender',
'age',
'accent',
'transcript'
}

def _update_attribute(attribute, label):
Expand Down Expand Up @@ -470,7 +474,7 @@ def _export_task(self, zip_obj, target_dir=None):
self._write_data(zip_obj, target_dir)
self._write_task(zip_obj, target_dir)
self._write_manifest(zip_obj, target_dir)
self._write_annotations(zip_obj, target_dir)
# self._write_annotations(zip_obj, target_dir)
self._write_annotation_guide(zip_obj, target_dir)

def export_to(self, file, target_dir=None):
Expand Down
12 changes: 6 additions & 6 deletions cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,18 +1043,18 @@ def _is_zip(path):
'mode': 'annotation',
'unique': False,
},
'video': {
'has_mime_type': _is_video,
'extractor': VideoReader,
'mode': 'interpolation',
'unique': True,
},
'audio': {
'has_mime_type': _is_audio,
'extractor': AudioReader,
'mode': 'interpolation',
'unique': False,
},
'video': {
'has_mime_type': _is_video,
'extractor': VideoReader,
'mode': 'interpolation',
'unique': True,
},
'archive': {
'has_mime_type': _is_archive,
'extractor': ArchiveReader,
Expand Down
4 changes: 4 additions & 0 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ def _count_files(data):
def count_files(file_mapping, counter):
for rel_path, full_path in file_mapping.items():
mime = get_mime(full_path)

slogger.glob.debug("Mimetype")
slogger.glob.debug(mime)
if mime in counter:
counter[mime].append(rel_path)
elif rel_path.endswith('.jsonl'):
Expand Down Expand Up @@ -686,6 +689,7 @@ def _create_thread(

# count and validate uploaded files
media = _count_files(data)

media, task_mode = _validate_data(media, manifest_files)

if job_file_mapping is not None and task_mode != 'annotation':
Expand Down
3 changes: 2 additions & 1 deletion cvat/requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,5 @@ rq==1.15.1
rules>=3.3
Shapely==1.7.1
tensorflow==2.11.1 # Optional requirement of Datumaro. Use tensorflow-macos==2.8.0 for Mac M1
soundfile==0.12.1
soundfile==0.12.1
pydub==0.25.1
1 change: 1 addition & 0 deletions cvat/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -409,3 +409,4 @@ setuptools==68.2.2
# tensorflow

soundfile==0.12.1
pydub==0.25.1
3 changes: 2 additions & 1 deletion cvat/requirements/development.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ pylint-plugin-utils==0.7
pylint==2.14.5
rope==0.17.0
snakeviz==2.1.0
soundfile==0.12.1
soundfile==0.12.1
pydub==0.25.1
2 changes: 1 addition & 1 deletion cvat/requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,5 @@ tornado==6.3.3
# via snakeviz

soundfile==0.12.1

pydub==0.25.1
# The following packages are considered to be unsafe in a requirements file:
1 change: 1 addition & 0 deletions cvat/requirements/production.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ watchfiles==0.20.0
websockets==11.0.3
# via uvicorn
soundfile==0.12.1
pydub==0.25.1
# The following packages are considered to be unsafe in a requirements file:
2 changes: 1 addition & 1 deletion cvat/settings/email_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
ACCOUNT_AUTHENTICATION_METHOD = 'username_email'
ACCOUNT_CONFIRM_EMAIL_ON_GET = True
ACCOUNT_EMAIL_REQUIRED = True
ACCOUNT_EMAIL_VERIFICATION = 'mandatory'
ACCOUNT_EMAIL_VERIFICATION = 'none'

# Email backend settings for Django
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'