Skip to content

Commit

Permalink
ultralytics 8.0.217 HUB YAML path improvements (ultralytics#6556)
Browse files Browse the repository at this point in the history
Signed-off-by: Glenn Jocher <[email protected]>
  • Loading branch information
glenn-jocher authored Nov 24, 2023
1 parent 8f1c3f3 commit 40a349b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 34 deletions.
25 changes: 12 additions & 13 deletions docs/en/guides/kfold-cross-validation.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,18 @@ Without further ado, let's dive in!

- This guide assumes that annotation files are locally available.

- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.

- This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below.

| Class Label | Instance Count |
|:------------|:--------------:|
| Apple | 7049 |
| Grapes | 7202 |
| Pineapple | 1613 |
| Orange | 15549 |
| Banana | 3536 |
| Watermelon | 1976 |
- For our demonstration, we use the [Fruit Detection](https://www.kaggle.com/datasets/lakshaytyagi01/fruit-detection/code) dataset.
- This dataset contains a total of 8479 images.
- It includes 6 class labels, each with its total instance counts listed below.

| Class Label | Instance Count |
|:------------|:--------------:|
| Apple | 7049 |
| Grapes | 7202 |
| Pineapple | 1613 |
| Orange | 15549 |
| Banana | 3536 |
| Watermelon | 1976 |

- Necessary Python packages include:

Expand Down
2 changes: 1 addition & 1 deletion docs/hi/models/sam.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ Segment Anything Model का उपयोग उपस्थित डेटा
| SAM का सबसे छोटा, SAM-b | 358 MB | 94.7 M | 51096 ms/im |
| [मोबाइल SAM](mobile-sam.md) | 40.7 MB | 10.1 M | 46122 ms/im |
| [अग्री सेगमेंटेशन वाली FastSAM-s, YOLOv8 बैकबोन सहित](fast-sam.md) | 23.7 MB | 11.8 M | 115 ms/im |
| Ultralytics [योलोवी8न-seg](../टास्क/सेगमेंट.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |
| Ultralytics [योलोवी8न-seg](yolov8.md) | **6.7 MB** (53.4 गुना छोटा) | **3.4 M** (27.9x कम) | **59 ms/im** (866x तेज) |

यह तुलना मॉडल के आकार और गति में दस्तावेजीय अंतर दिखाती है। जहां SAM स्वचालित सेगमेंटेशन के लिए अद्वितीय क्षमताओं को प्रस्तुत करता है, वहीं Ultralytics विद्यमान सेगमेंटेशन मानदंडों के तुलनात्मक आकार, गति और संचालन क्षमता में समर्थन प्रदान करती है।

Expand Down
41 changes: 21 additions & 20 deletions ultralytics/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from ultralytics.nn.autobackend import check_class_names
from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
emojis, yaml_load)
emojis, yaml_load, yaml_save)
from ultralytics.utils.checks import check_file, check_font, is_ascii
from ultralytics.utils.downloads import download, safe_download, unzip_file
from ultralytics.utils.ops import segments2boxes
Expand Down Expand Up @@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
(dict): Parsed dataset information and paths.
"""

data = check_file(dataset)
file = check_file(dataset)

# Download (optional)
extract_dir = ''
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
data = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = data.parent, False
if zipfile.is_zipfile(file) or is_tarfile(file):
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
file = find_dataset_yaml(DATASETS_DIR / new_dir)
extract_dir, autodownload = file.parent, False

# Read YAML (optional)
if isinstance(data, (str, Path)):
data = yaml_load(data, append_filename=True) # dictionary
# Read YAML
data = yaml_load(file, append_filename=True) # dictionary

# Checks
for k in 'train', 'val':
if k not in data:
if k == 'val' and 'validation' in data:
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
else:
if k != 'val' or 'validation' not in data:
raise SyntaxError(
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
if 'names' not in data and 'nc' not in data:
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
Expand All @@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):

# Resolve paths
path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root

if not path.is_absolute():
path = (DATASETS_DIR / path).resolve()

# Set paths
data['path'] = path # download scripts
for k in 'train', 'val', 'test':
if data.get(k): # prepend path
Expand Down Expand Up @@ -404,7 +403,7 @@ class HUBDatasetStats:
A class for generating HUB dataset JSON and `-hub` dataset directory.
Args:
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'.
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
Expand All @@ -424,7 +423,7 @@ class HUBDatasetStats:
```
"""

def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
"""Initialize class."""
path = Path(path).resolve()
LOGGER.info(f'Starting HUB dataset checks for {path}....')
Expand All @@ -437,10 +436,12 @@ def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
else: # detect, segment, pose
zipped, data_dir, yaml_path = self._unzip(Path(path))
try:
# data = yaml_load(check_yaml(yaml_path)) # data dict
data = check_det_dataset(yaml_path, autodownload) # data dict
if zipped:
data['path'] = data_dir
# Load YAML with checks
data = yaml_load(yaml_path)
data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets
yaml_save(yaml_path, data)
data = check_det_dataset(yaml_path, autodownload) # dict
data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
except Exception as e:
raise Exception('error/HUB/dataset_stats/init') from e

Expand Down

0 comments on commit 40a349b

Please sign in to comment.