Skip to content

Commit

Permalink
Merge pull request blaylockbk#233 from GabrielKS/main
Browse files Browse the repository at this point in the history
Fix blaylockbk#232 cURL off-by-one error, add tests, etc.
  • Loading branch information
blaylockbk authored Nov 7, 2023
2 parents 57ee92d + ceb36ff commit 0976006
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 3 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ env_pypi.yml
condaenv*
.venv*

.DS_Store

.pytest_cache/

*.idx
Expand Down
3 changes: 2 additions & 1 deletion herbie/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,8 @@ def subset(searchString, outFile):
group_dfs = []
for i, group in enumerate(curl_groups):
_df = idx_df.loc[group]
curl_ranges.append(f"{_df.iloc[0].start_byte}-{_df.iloc[-1].end_byte}")
# cURL ranges are end-inclusive, so subtract one from our end-exclusive end_byte
curl_ranges.append(f"{_df.iloc[0].start_byte}-{_df.iloc[-1].end_byte-1}")
group_dfs.append(_df)

for i, (range, _df) in enumerate(zip(curl_ranges, group_dfs)):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
"""
Tests for downloading ECMWF model
"""
from datetime import datetime
from datetime import datetime, timedelta

from herbie import Herbie

now = datetime.now()
yesterday = datetime(now.year, now.month, now.day - 1)
yesterday = datetime(now.year, now.month, now.day) - timedelta(days=1)
today_str = yesterday.strftime("%Y-%m-%d %H:%M")
save_dir = "$TMPDIR/Herbie-Tests/"

Expand Down
41 changes: 41 additions & 0 deletions tests/test_hrrr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from herbie import Herbie, Path
import os
import requests
import pandas as pd

now = datetime.now()
today = datetime(now.year, now.month, now.day, now.hour) - timedelta(hours=6)
Expand Down Expand Up @@ -116,3 +118,42 @@ def test_create_idx_with_wgrib2():
H.download()
H.idx = None
assert len(H.index_as_dataframe) > 0

def _size_from_index(H, searchString=None):
"""Get the size that a file should be from its index, assuming a remote URL."""
inventory = H.inventory(searchString)
# The last end_byte may be blank, in which case fill with the length of the file
file_length = requests.get(H.grib, stream=True).headers["Content-Length"]
ends = pd.to_numeric(inventory.end_byte, errors="coerce").fillna(file_length).astype(int)
return (ends-inventory.start_byte.astype(int)).sum()

def _size_from_file(H, searchString=None):
"""Get the actual size of a downloaded file."""
return H.get_localFilePath(searchString).stat().st_size

def test_hrrr_file_size_full():
"""Test that theoretical size matches actual size for full (non-subset) files."""
H = Herbie(
today,
model="hrrr",
product="sfc",
save_dir=save_dir,
overwrite=True
)
stated_size = _size_from_index(H)
H.download()
assert stated_size == _size_from_file(H)

def test_hrrr_file_size_subset():
"""Test that theoretical size matches actual size for subset files."""
var = ":.GRD:"
H = Herbie(
today,
model="hrrr",
product="sfc",
save_dir=save_dir,
overwrite=True
)
stated_size = _size_from_index(H, var)
H.download(var)
assert stated_size == _size_from_file(H, var)

0 comments on commit 0976006

Please sign in to comment.