Merge pull request HiPCTProject#38 from HiPCTProject/meta

add dataset metadata per level
brainglobe · Apr 29, 2024 · b6ff628 · b6ff628
2 parents 1a08a8c + 54aae1c
commit b6ff628
Show file tree

Hide file tree

Showing 2 changed files with 85 additions and 37 deletions.
diff --git a/src/stack_to_chunk/main.py b/src/stack_to_chunk/main.py
@@ -178,20 +178,9 @@ def add_full_res_data(
             p.join()
 
         blosc.use_threads = blosc_use_threads
+        self._add_level_metadata(0)
         logger.info("Finished full resolution copy to zarr.")
 
-        multiscales = self._group.attrs["multiscales"]
-        multiscales[0]["datasets"].append(
-            {
-                "path": "0",
-                "coordinateTransformations": [
-                    {"type": "scale", "scale": list(self._voxel_size)}
-                ],
-            }
-        )
-
-        self._group.attrs["multiscales"] = multiscales
-
     def add_downsample_level(self, level: int) -> None:
         """
         Add a level of downsampling.
@@ -233,6 +222,37 @@ def add_downsample_level(self, level: int) -> None:
             compressor=source_data.compressor,
         )
 
+    def _add_level_metadata(self, level: int = 0) -> None:
+        """
+        Add the required multiscale metadata for the corresponding level.
+
+        Parameters
+        ----------
+        level :
+            Level of downsampling. Level 0 corresponds to full resolution data.
+
+        """
+        # we assume that the scale factor is always 2 in each dimension
+        scale_factors = [float(s * 2**level) for s in self._voxel_size]
+        new_dataset = {
+            "path": str(level),
+            "coordinateTransformations": [
+                {
+                    "type": "scale",
+                    "scale": scale_factors,
+                }
+            ],
+        }
+
+        multiscales = self._group.attrs["multiscales"][0]
+        existing_dataset_paths = [d["path"] for d in multiscales["datasets"]]
+        if new_dataset["path"] in existing_dataset_paths:
+            msg = f"Level {level} already in multiscales metadata"
+            raise RuntimeError(msg)
+
+        multiscales["datasets"].append(new_dataset)
+        self._group.attrs["multiscales"] = [multiscales]
+
 
 def open_multiscale_group(path: Path) -> MultiScaleGroup:
     """

diff --git a/src/stack_to_chunk/tests/test_main.py b/src/stack_to_chunk/tests/test_main.py
@@ -2,6 +2,7 @@
 
 import json
 from pathlib import Path
+from typing import Any
 
 import dask.array as da
 import numcodecs
@@ -12,6 +13,12 @@
 from stack_to_chunk import MultiScaleGroup, memory_per_process, open_multiscale_group
 
 
+def check_zattrs(zarr_path: Path, expected: dict[str, Any]) -> None:
+    with (zarr_path / ".zattrs").open() as f:
+        data = json.load(f)
+    assert data == expected
+
+
 @pytest.fixture()
 def arr() -> da.Array:
     shape = (583, 245, 156)
@@ -35,6 +42,26 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
     compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2)
     chunk_size = 64
 
+    check_zattrs(
+        zarr_path,
+        {
+            "multiscales": [
+                {
+                    "axes": [
+                        {"name": "x", "type": "space", "unit": "centimeter"},
+                        {"name": "y", "type": "space", "unit": "centimeter"},
+                        {"name": "z", "type": "space", "unit": "centimeter"},
+                    ],
+                    "datasets": [],
+                    "metadata": {"description": "Downscaled using linear resampling"},
+                    "name": "my_zarr_group",
+                    "type": "linear",
+                    "version": "0.4",
+                }
+            ]
+        },
+    )
+
     assert memory_per_process(arr, chunk_size=chunk_size) == 18282880
     group.add_full_res_data(
         arr,
@@ -52,31 +79,32 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
     # Check that data is equal in dask array and zarr array
     np.testing.assert_equal(arr[:], zarr_arr[:])
     # Check metadata
-    with (zarr_path / ".zattrs").open() as f:
-        data = json.load(f)
-    assert data == {
-        "multiscales": [
-            {
-                "axes": [
-                    {"name": "x", "type": "space", "unit": "centimeter"},
-                    {"name": "y", "type": "space", "unit": "centimeter"},
-                    {"name": "z", "type": "space", "unit": "centimeter"},
-                ],
-                "datasets": [
-                    {
-                        "coordinateTransformations": [
-                            {"scale": [3, 4, 5], "type": "scale"}
-                        ],
-                        "path": "0",
-                    }
-                ],
-                "metadata": {"description": "Downscaled using linear resampling"},
-                "name": "my_zarr_group",
-                "type": "linear",
-                "version": "0.4",
-            }
-        ]
-    }
+    check_zattrs(
+        zarr_path,
+        {
+            "multiscales": [
+                {
+                    "axes": [
+                        {"name": "x", "type": "space", "unit": "centimeter"},
+                        {"name": "y", "type": "space", "unit": "centimeter"},
+                        {"name": "z", "type": "space", "unit": "centimeter"},
+                    ],
+                    "datasets": [
+                        {
+                            "coordinateTransformations": [
+                                {"scale": [3, 4, 5], "type": "scale"}
+                            ],
+                            "path": "0",
+                        }
+                    ],
+                    "metadata": {"description": "Downscaled using linear resampling"},
+                    "name": "my_zarr_group",
+                    "type": "linear",
+                    "version": "0.4",
+                }
+            ]
+        },
+    )
 
     with (zarr_path / ".zgroup").open() as f:
         data = json.load(f)