Skip to content

Commit

Permalink
Merge pull request HiPCTProject#38 from HiPCTProject/meta
Browse files Browse the repository at this point in the history
add dataset metadata per level
  • Loading branch information
dstansby authored Apr 29, 2024
2 parents 1a08a8c + 54aae1c commit b6ff628
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 37 deletions.
44 changes: 32 additions & 12 deletions src/stack_to_chunk/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,20 +178,9 @@ def add_full_res_data(
p.join()

blosc.use_threads = blosc_use_threads
self._add_level_metadata(0)
logger.info("Finished full resolution copy to zarr.")

multiscales = self._group.attrs["multiscales"]
multiscales[0]["datasets"].append(
{
"path": "0",
"coordinateTransformations": [
{"type": "scale", "scale": list(self._voxel_size)}
],
}
)

self._group.attrs["multiscales"] = multiscales

def add_downsample_level(self, level: int) -> None:
"""
Add a level of downsampling.
Expand Down Expand Up @@ -233,6 +222,37 @@ def add_downsample_level(self, level: int) -> None:
compressor=source_data.compressor,
)

def _add_level_metadata(self, level: int = 0) -> None:
"""
Add the required multiscale metadata for the corresponding level.
Parameters
----------
level :
Level of downsampling. Level 0 corresponds to full resolution data.
"""
# we assume that the scale factor is always 2 in each dimension
scale_factors = [float(s * 2**level) for s in self._voxel_size]
new_dataset = {
"path": str(level),
"coordinateTransformations": [
{
"type": "scale",
"scale": scale_factors,
}
],
}

multiscales = self._group.attrs["multiscales"][0]
existing_dataset_paths = [d["path"] for d in multiscales["datasets"]]
if new_dataset["path"] in existing_dataset_paths:
msg = f"Level {level} already in multiscales metadata"
raise RuntimeError(msg)

multiscales["datasets"].append(new_dataset)
self._group.attrs["multiscales"] = [multiscales]


def open_multiscale_group(path: Path) -> MultiScaleGroup:
"""
Expand Down
78 changes: 53 additions & 25 deletions src/stack_to_chunk/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
from pathlib import Path
from typing import Any

import dask.array as da
import numcodecs
Expand All @@ -12,6 +13,12 @@
from stack_to_chunk import MultiScaleGroup, memory_per_process, open_multiscale_group


def check_zattrs(zarr_path: Path, expected: dict[str, Any]) -> None:
with (zarr_path / ".zattrs").open() as f:
data = json.load(f)
assert data == expected


@pytest.fixture()
def arr() -> da.Array:
shape = (583, 245, 156)
Expand All @@ -35,6 +42,26 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
compressor = numcodecs.blosc.Blosc(cname="zstd", clevel=2, shuffle=2)
chunk_size = 64

check_zattrs(
zarr_path,
{
"multiscales": [
{
"axes": [
{"name": "x", "type": "space", "unit": "centimeter"},
{"name": "y", "type": "space", "unit": "centimeter"},
{"name": "z", "type": "space", "unit": "centimeter"},
],
"datasets": [],
"metadata": {"description": "Downscaled using linear resampling"},
"name": "my_zarr_group",
"type": "linear",
"version": "0.4",
}
]
},
)

assert memory_per_process(arr, chunk_size=chunk_size) == 18282880
group.add_full_res_data(
arr,
Expand All @@ -52,31 +79,32 @@ def test_workflow(tmp_path: Path, arr: da.Array) -> None:
# Check that data is equal in dask array and zarr array
np.testing.assert_equal(arr[:], zarr_arr[:])
# Check metadata
with (zarr_path / ".zattrs").open() as f:
data = json.load(f)
assert data == {
"multiscales": [
{
"axes": [
{"name": "x", "type": "space", "unit": "centimeter"},
{"name": "y", "type": "space", "unit": "centimeter"},
{"name": "z", "type": "space", "unit": "centimeter"},
],
"datasets": [
{
"coordinateTransformations": [
{"scale": [3, 4, 5], "type": "scale"}
],
"path": "0",
}
],
"metadata": {"description": "Downscaled using linear resampling"},
"name": "my_zarr_group",
"type": "linear",
"version": "0.4",
}
]
}
check_zattrs(
zarr_path,
{
"multiscales": [
{
"axes": [
{"name": "x", "type": "space", "unit": "centimeter"},
{"name": "y", "type": "space", "unit": "centimeter"},
{"name": "z", "type": "space", "unit": "centimeter"},
],
"datasets": [
{
"coordinateTransformations": [
{"scale": [3, 4, 5], "type": "scale"}
],
"path": "0",
}
],
"metadata": {"description": "Downscaled using linear resampling"},
"name": "my_zarr_group",
"type": "linear",
"version": "0.4",
}
]
},
)

with (zarr_path / ".zgroup").open() as f:
data = json.load(f)
Expand Down

0 comments on commit b6ff628

Please sign in to comment.