From c0f0d1bf76194e115d2de2be76bb922bd53775e8 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:11:36 +0100 Subject: [PATCH 1/7] Remove junk --- pvlib/iotools/solrad.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index e5bb05d709..31fc8c8625 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -49,8 +49,9 @@ def read_solrad(filename): """ - Read NOAA SOLRAD fixed-width file into pandas dataframe. The SOLRAD - network is described in [1]_ and [2]_. + Read NOAA SOLRAD fixed-width file into pandas dataframe. + + The SOLRAD network is described in [1]_ and [2]_. Parameters ---------- @@ -62,6 +63,8 @@ def read_solrad(filename): data: Dataframe A dataframe with DatetimeIndex and all of the variables in the file. + metadata : dict + Metadata. Notes ----- @@ -93,17 +96,7 @@ def read_solrad(filename): # read in data data = pd.read_fwf(filename, header=None, skiprows=2, names=names, - widths=widths, na_values=-9999.9) - - # loop here because dtype kwarg not supported in read_fwf until 0.20 - for (col, _dtype) in zip(data.columns, dtypes): - ser = data[col].astype(_dtype) - if _dtype == 'float64': - # older verions of pandas/numpy read '-9999.9' as - # -9999.8999999999996 and fail to set nan in read_fwf, - # so manually set nan - ser = ser.where(ser > -9999, other=np.nan) - data[col] = ser + widths=widths, na_values=-9999.9, dtypes=dtypes) # set index # columns do not have leading 0s, so must zfill(2) to comply @@ -114,10 +107,5 @@ def read_solrad(filename): data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] + dts['minute'], format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) - try: - # to_datetime(utc=True) does not work in older versions of pandas - data = data.tz_localize('UTC') - except TypeError: - pass return data From 752c234285936cda48d777277a36d6bb295693a6 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:48:40 +0100 Subject: [PATCH 2/7] Add function --- pvlib/iotools/solrad.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index 31fc8c8625..483bbc1779 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -1,8 +1,8 @@ -"""Functions to read data from the NOAA SOLRAD network. -""" +"""Functions to read data from the NOAA SOLRAD network.""" -import numpy as np import pandas as pd +import requests +import io # pvlib conventions BASE_HEADERS = ( @@ -53,6 +53,12 @@ def read_solrad(filename): The SOLRAD network is described in [1]_ and [2]_. + .. versionchanged:: 0.10.4 + The function now returns a tuple where the first element is a dataframe + and the second element is a dictionary containing metadata. Previous + versions of this function only returned a dataframe. + + Parameters ---------- filename: str @@ -94,8 +100,27 @@ def read_solrad(filename): widths = WIDTHS dtypes = DTYPES + meta = {} + + if str(filename).startswith('ftp') or str(filename).startswith('http'): + response = requests.get(filename) + file_buffer = io.StringIO(response.content.decode()) + else: + file_buffer = open(str(filename), 'r') + + # the first has the name of the station, and the second gives the + # station's latitude, longitude, elevation above mean sea level in meters, + # and the displacement in hours from local standard time. + meta['station_name'] = file_buffer.readline().strip() + + meta_line = file_buffer.readline().split() + meta['latitude'] = float(meta_line[0]) + meta['longitude'] = float(meta_line[1]) + meta['elevation'] = float(meta_line[2]) + meta['TZ'] = int(meta_line[3]) + # read in data - data = pd.read_fwf(filename, header=None, skiprows=2, names=names, + data = pd.read_fwf(file_buffer, header=None, names=names, widths=widths, na_values=-9999.9, dtypes=dtypes) # set index @@ -108,4 +133,4 @@ def read_solrad(filename): dts['minute'], format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) - return data + return data, meta From 060b9fe3b493adf829ff50ae7dc37200df8df51c Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:48:49 +0100 Subject: [PATCH 3/7] Add tests --- pvlib/tests/iotools/test_solrad.py | 34 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index f8f97af41f..020ced2136 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -5,12 +5,17 @@ import pytest from pvlib.iotools import solrad -from ..conftest import DATA_DIR, assert_frame_equal +from ..conftest import DATA_DIR, assert_frame_equal, RERUNS, RERUNS_DELAY testfile = DATA_DIR / 'abq19056.dat' testfile_mad = DATA_DIR / 'msn19056.dat' +https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/abq/' + '2019/abq19056.dat') + +https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/' + '2019/msn19056.dat') columns = [ 'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time', @@ -87,15 +92,32 @@ 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64'] +meta = {'station_name': 'Albuquerque', 'latitude': 35.03796, + 'longitude': -106.62211, 'elevation': 1617, 'TZ': -7} +meta_mad = {'station_name': 'Madison', 'latitude': 43.07250, + 'longitude': -89.41133, 'elevation': 271, 'TZ': -6} -@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [ - (testfile, index, columns, values, dtypes), - (testfile_mad, index, columns_mad, values_mad, dtypes_mad) +@pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [ + (testfile, index, columns, values, dtypes, meta), + (testfile_mad, index, columns_mad, values_mad, dtypes_mad, meta_mad) ]) -def test_read_solrad(testfile, index, columns, values, dtypes): +def test_read_solrad(testfile, index, columns, values, dtypes, meta): expected = pd.DataFrame(values, columns=columns, index=index) for (col, _dtype) in zip(expected.columns, dtypes): expected[col] = expected[col].astype(_dtype) - out = solrad.read_solrad(testfile) + out, m = solrad.read_solrad(testfile) assert_frame_equal(out, expected) + assert m == meta + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_read_solrad_https(): + # Test reading of https files. + # If this test begins failing, SOLRAD's data structure or data + # archive may have changed. + local_data, _ = solrad.read_solrad(testfile_mad) + remote_data, _ = solrad.read_solrad(https_testfile) + # local file only contains four rows to save space + assert_frame_equal(local_data, remote_data.iloc[:4]) From f83e2d1a519e8dc8200d7c92af5ddafc16ced542 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:54:33 +0100 Subject: [PATCH 4/7] Update v0.10.4.rst --- docs/sphinx/source/whatsnew/v0.10.4.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/sphinx/source/whatsnew/v0.10.4.rst b/docs/sphinx/source/whatsnew/v0.10.4.rst index 3cab3fc8ad..72e402c919 100644 --- a/docs/sphinx/source/whatsnew/v0.10.4.rst +++ b/docs/sphinx/source/whatsnew/v0.10.4.rst @@ -8,7 +8,8 @@ v0.10.4 (Anticipated March, 2024) Enhancements ~~~~~~~~~~~~ * Added the Huld PV model used by PVGIS (:pull:`1940`) - +* Added metadata parsing to :py:func:`~pvlib.iotools.read_solrad` to follow the standard iotools + convention of returning a tuple of (data, meta). Previously the function only returned a dataframe. (:pull:`1968`) Bug fixes ~~~~~~~~~ From e6a1300c93d2fdf3e7522b21279507a5b3b0e0f8 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 9 Feb 2024 23:57:51 +0100 Subject: [PATCH 5/7] Remove double testfile --- pvlib/tests/iotools/test_solrad.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index 020ced2136..c039ac72b7 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -10,10 +10,6 @@ testfile = DATA_DIR / 'abq19056.dat' testfile_mad = DATA_DIR / 'msn19056.dat' - -https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/abq/' - '2019/abq19056.dat') - https_testfile = ('https://gml.noaa.gov/aftp/data/radiation/solrad/msn/' '2019/msn19056.dat') From 2134a18c25746a2f1ddbd46ce07930367a61d7fe Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Sat, 10 Feb 2024 00:02:52 +0100 Subject: [PATCH 6/7] Change elevation to altitude --- pvlib/iotools/solrad.py | 4 ++-- pvlib/tests/iotools/test_solrad.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index 483bbc1779..f47e76b412 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -108,7 +108,7 @@ def read_solrad(filename): else: file_buffer = open(str(filename), 'r') - # the first has the name of the station, and the second gives the + # The first line has the name of the station, and the second gives the # station's latitude, longitude, elevation above mean sea level in meters, # and the displacement in hours from local standard time. meta['station_name'] = file_buffer.readline().strip() @@ -116,7 +116,7 @@ def read_solrad(filename): meta_line = file_buffer.readline().split() meta['latitude'] = float(meta_line[0]) meta['longitude'] = float(meta_line[1]) - meta['elevation'] = float(meta_line[2]) + meta['altitude'] = float(meta_line[2]) meta['TZ'] = int(meta_line[3]) # read in data diff --git a/pvlib/tests/iotools/test_solrad.py b/pvlib/tests/iotools/test_solrad.py index c039ac72b7..abfa5d6e31 100644 --- a/pvlib/tests/iotools/test_solrad.py +++ b/pvlib/tests/iotools/test_solrad.py @@ -89,9 +89,9 @@ 'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64', 'float64'] meta = {'station_name': 'Albuquerque', 'latitude': 35.03796, - 'longitude': -106.62211, 'elevation': 1617, 'TZ': -7} + 'longitude': -106.62211, 'altitude': 1617, 'TZ': -7} meta_mad = {'station_name': 'Madison', 'latitude': 43.07250, - 'longitude': -89.41133, 'elevation': 271, 'TZ': -6} + 'longitude': -89.41133, 'altitude': 271, 'TZ': -6} @pytest.mark.parametrize('testfile,index,columns,values,dtypes,meta', [ From 44000e6bfecdb6dc43bce4ae1fc8d196d9ed0fe4 Mon Sep 17 00:00:00 2001 From: "Adam R. Jensen" <39184289+AdamRJensen@users.noreply.github.com> Date: Fri, 1 Mar 2024 14:35:54 +0100 Subject: [PATCH 7/7] use "with open" from code review --- pvlib/iotools/solrad.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py index f47e76b412..6d9dde743a 100644 --- a/pvlib/iotools/solrad.py +++ b/pvlib/iotools/solrad.py @@ -106,7 +106,8 @@ def read_solrad(filename): response = requests.get(filename) file_buffer = io.StringIO(response.content.decode()) else: - file_buffer = open(str(filename), 'r') + with open(str(filename), 'r') as file_buffer: + file_buffer = io.StringIO(file_buffer.read()) # The first line has the name of the station, and the second gives the # station's latitude, longitude, elevation above mean sea level in meters,