Skip to content

Commit

Permalink
NOAA GML data (ARM-DOE#443)
Browse files Browse the repository at this point in the history
* Changed to pass kwargs to read_csv()

* Updated for SPLASH campaign specific files

* Fixing issue with string filenames expected to be pathlib.Path objects. Also fixing incorrectly reading file with header=0

* Updating test values to match correct values read from file now that part is fixed.

* Fixing documentation
  • Loading branch information
kenkehoe authored Apr 4, 2022
1 parent 0dd2bf3 commit e4ca90c
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 18 deletions.
27 changes: 21 additions & 6 deletions act/io/noaagml.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def read_gml(filename, datatype=None, remove_time_vars=True, convert_missing=Tru
convert_missing : bool
Convert missing value indicator in CSV to NaN in Xarray DataSet.
**kwargs : keywords
Keywords to pass through to read_gml_met() reading routine.
Keywords to pass through to instrument specific reading routine.
Returns
-------
Expand Down Expand Up @@ -238,7 +238,7 @@ def read_gml_halo(filename, **kwargs):
header += 1

ds = act.io.csvfiles.read_csv(
filename, sep=r'\s+', header=header, na_values=['Nan', 'NaN', 'nan', 'NAN']
filename, sep=r'\s+', header=header, na_values=['Nan', 'NaN', 'nan', 'NAN'], **kwargs
)
var_names = list(ds.data_vars)
year_name, month_name, day_name, hour_name, min_name = None, None, None, None, None
Expand Down Expand Up @@ -413,7 +413,7 @@ def read_gml_co2(filename=None, convert_missing=True, **kwargs):
with open(test_filename) as fc:
skiprows = int(fc.readline().strip().split()[-1]) - 1

ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', skiprows=skiprows)
ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs)

timestamp = np.full(ds['year'].size, np.nan, dtype='datetime64[s]')
for ii in range(0, len(timestamp)):
Expand Down Expand Up @@ -532,7 +532,7 @@ def read_gml_ozone(filename=None, **kwargs):
pass
skiprows += 1

ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', skiprows=skiprows)
ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', skiprows=skiprows, **kwargs)
ds.attrs['station'] = str(ds['STN'].values[0]).lower()

timestamp = np.full(ds['YEAR'].size, np.nan, dtype='datetime64[s]')
Expand Down Expand Up @@ -730,6 +730,21 @@ def read_gml_radiation(filename=None, convert_missing=True, remove_time_vars=Tru
},
}

# Add additinal column names for NOAA SPASH campaign
if str(Path(filename).name).startswith('cbc') or str(Path(filename).name).startswith('ckp'):
column_names['SPN1_total'] = {
'units': 'W/m^2',
'long_name': 'SPN1 total average',
'_FillValue': -9999.9,
'__type': np.float32
}
column_names['SPN1_diffuse'] = {
'units': 'W/m^2',
'long_name': 'SPN1 diffuse average',
'_FillValue': -9999.9,
'__type': np.float32
}

names = list(column_names.keys())
skip_vars = [
'year',
Expand All @@ -748,7 +763,7 @@ def read_gml_radiation(filename=None, convert_missing=True, remove_time_vars=Tru
names.insert(ii + num, 'qc_' + name)
num += 1

ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', header=0, skiprows=2, column_names=names)
ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', header=None, skiprows=2, column_names=names, **kwargs)

if isinstance(filename, (list, tuple)):
filename = filename[0]
Expand Down Expand Up @@ -970,7 +985,7 @@ def read_gml_met(filename=None, convert_missing=True, **kwargs):
minutes = False
del column_names['minute']

ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', header=0, column_names=column_names.keys())
ds = act.io.csvfiles.read_csv(filename, sep=r'\s+', header=None, column_names=column_names.keys(), **kwargs)

if ds is not None:

Expand Down
24 changes: 12 additions & 12 deletions act/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ def test_io_mpldataset():
def test_read_gml():
# Test Radiation
ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, datatype='RADIATION')
assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1629.68)
assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4185.73)
assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28)
assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88)
assert (
ds['upwelling_infrared_case_temp'].attrs['ancillary_variables']
== 'qc_upwelling_infrared_case_temp'
Expand All @@ -226,8 +226,8 @@ def test_read_gml():
assert ds['time'].values[-1] == np.datetime64('2021-01-01T00:17:00')

ds = read_gml(sample_files.EXAMPLE_GML_RADIATION, convert_missing=False)
assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1629.68)
assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4185.73)
assert np.isclose(np.nansum(ds['solar_zenith_angle']), 1725.28)
assert np.isclose(np.nansum(ds['upwelling_infrared_case_temp']), 4431.88)
assert (
ds['upwelling_infrared_case_temp'].attrs['ancillary_variables']
== 'qc_upwelling_infrared_case_temp'
Expand All @@ -247,20 +247,20 @@ def test_read_gml():

# Test MET
ds = read_gml(sample_files.EXAMPLE_GML_MET, datatype='MET')
assert np.isclose(np.nansum(ds['wind_speed'].values), 140.999)
assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1)
assert ds['wind_speed'].attrs['units'] == 'm/s'
assert np.isnan(ds['wind_speed'].attrs['_FillValue'])
assert np.sum(np.isnan(ds['preciptation_intensity'].values)) == 19
assert np.sum(np.isnan(ds['preciptation_intensity'].values)) == 20
assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour'
assert ds['time'].values[0] == np.datetime64('2020-01-01T01:00:00')
assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00')

ds = read_gml(sample_files.EXAMPLE_GML_MET, convert_missing=False)
assert np.isclose(np.nansum(ds['wind_speed'].values), 140.999)
assert np.isclose(np.nansum(ds['wind_speed'].values), 148.1)
assert ds['wind_speed'].attrs['units'] == 'm/s'
assert np.isclose(ds['wind_speed'].attrs['_FillValue'], -999.9)
assert np.sum(ds['preciptation_intensity'].values) == -1881
assert np.sum(ds['preciptation_intensity'].values) == -1980
assert ds['preciptation_intensity'].attrs['units'] == 'mm/hour'
assert ds['time'].values[0] == np.datetime64('2020-01-01T01:00:00')
assert ds['time'].values[0] == np.datetime64('2020-01-01T00:00:00')

# Test Ozone
ds = read_gml(sample_files.EXAMPLE_GML_OZONE, datatype='OZONE')
Expand Down Expand Up @@ -301,14 +301,14 @@ def test_read_gml():

# Test Halocarbon
ds = read_gml(sample_files.EXAMPLE_GML_HALO, datatype='HALO')
assert np.isclose(np.nansum(ds['CCl4'].values), 1342.6499)
assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65)
assert ds['CCl4'].attrs['units'] == 'ppt'
assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median'
assert np.isnan(ds['CCl4'].attrs['_FillValue'])
assert ds['time'].values[0] == np.datetime64('1998-06-16T00:00:00')

ds = read_gml(sample_files.EXAMPLE_GML_HALO)
assert np.isclose(np.nansum(ds['CCl4'].values), 1342.6499)
assert np.isclose(np.nansum(ds['CCl4'].values), 1342.65)
assert ds['CCl4'].attrs['units'] == 'ppt'
assert ds['CCl4'].attrs['long_name'] == 'Carbon Tetrachloride (CCl4) daily median'
assert np.isnan(ds['CCl4'].attrs['_FillValue'])
Expand Down

0 comments on commit e4ca90c

Please sign in to comment.