"""
CAMS McClear HTTP retrieval helpers.
"""
import io
import pandas as pd
import requests
from bsrn.constants import MCCLEAR_INTEGRATED_COLUMNS, MCCLEAR_VARIABLE_MAP, MCCLEAR_API_HOST
def _parse_mcclear(raw_or_buffer):
"""
Parse SoDa McClear CSV into the project DataFrame (used by ``_download_mcclear`` only).
Parameters
----------
raw_or_buffer : str or file-like
Raw CAMS text or readable text buffer.
Returns
-------
data : pd.DataFrame
Parsed time-series data with UTC index for sub-daily resolutions.
Raises
------
ValueError
If the McClear header line is missing or the payload is invalid.
References
----------
.. [1] CAMS McClear service info. (n.d.). SoDa.
http://www.soda-pro.com/web-services/radiation/cams-mcclear/info
"""
if isinstance(raw_or_buffer, str):
fbuf = io.StringIO(raw_or_buffer)
else:
fbuf = raw_or_buffer
# Read metadata header lines until column names line
while True:
line = fbuf.readline()
if not line:
raise ValueError("Invalid McClear payload: header not found.")
line = line.rstrip("\n")
if line.startswith("# Observation period"):
names = line.lstrip("# ").split(";")
break
data = pd.read_csv(fbuf, sep=";", comment="#", header=None, names=names)
# Interval bounds from first column
obs_period = data["Observation period"].str.split("/")
# Using the first part of the period (start-time) for floor-style labeling.
data.index = pd.to_datetime(obs_period.str[0], utc=True)
# Convert Wh/m^2 to W/m^2 using interval duration
integrated_cols = [c for c in MCCLEAR_INTEGRATED_COLUMNS if c in data.columns]
time_delta = pd.to_datetime(obs_period.str[1]) - pd.to_datetime(obs_period.str[0])
hours = time_delta.dt.total_seconds() / 3600.0
data[integrated_cols] = data[integrated_cols].divide(hours.tolist(), axis="rows")
data.index.name = None
data = data.rename(columns=MCCLEAR_VARIABLE_MAP)
return data
def _download_mcclear(latitude, longitude, start, end, email, elev=None,
timeout=30):
"""
Download and parse CAMS McClear from SoDa (used by ``fetch_mcclear`` only).
Parameters
----------
latitude : float
Latitude in decimal degrees. [degrees]
longitude : float
Longitude in decimal degrees. [degrees]
start : datetime.datetime or pandas.Timestamp
Start date (inclusive) of requested period.
end : datetime.datetime or pandas.Timestamp
End date (inclusive) of requested period.
email : str
SoDa account email.
elev : float, optional
Station elevation. [m] If None, use SoDa default terrain lookup.
timeout : int, default 30
HTTP request timeout in seconds.
Returns
-------
data : pd.DataFrame
Parsed McClear data.
Raises
------
ValueError
If the request starts before 2004-01-01 or the response is not valid CSV.
requests.Timeout
If the HTTP request exceeds *timeout*.
requests.HTTPError
If SoDa returns a non-success status after ``raise_for_status``.
References
----------
.. [1] Lefèvre, M., Oumbe, A., Blanc, P., Espinar, B., Gschwind, B., Qu, Z.,
et al. (2013). McClear: A new model estimating downwelling solar
radiation at ground level in clear-sky conditions. Atmospheric
Measurement Techniques, 6(9), 2403–2418.
.. [2] Gschwind, B., Wald, L., Blanc, P., Lefèvre, M., Schroedter-Homscheidt, M.,
& Arola, A. (2019). Improving the McClear model estimating the downwelling
solar radiation at ground level in cloud-free conditions – McClear-v3.
Meteorologische Zeitschrift, 28(2).
"""
if elev is None:
elev = -999
# McClear availability: service is defined from 2004-01-01 onward.
start_ts = pd.Timestamp(start)
if start_ts.tzinfo is not None:
start_cmp = start_ts.tz_convert("UTC").tz_localize(None)
else:
start_cmp = start_ts
if start_cmp < pd.Timestamp("2004-01-01"):
raise ValueError(
"McClear data are only available from 2004-01-01 onward."
)
# Format dates and username for SoDa request
end_ts = pd.Timestamp(end)
if start_ts.tzinfo is not None:
start_str = start_ts.tz_convert("UTC").strftime("%Y-%m-%d")
else:
start_str = start_ts.strftime("%Y-%m-%d")
if end_ts.tzinfo is not None:
end_str = end_ts.tz_convert("UTC").strftime("%Y-%m-%d")
else:
end_str = end_ts.strftime("%Y-%m-%d")
email_encoded = email.replace("@", "%2540")
# Build WPS DataInputs payload for McClear (1‑min, UT)
data_inputs_dict = {
"latitude": latitude,
"longitude": longitude,
"altitude": elev,
"date_begin": start_str,
"date_end": end_str,
"time_ref": "UT",
"summarization": "PT01M",
"username": email_encoded,
"verbose": "false",
}
data_inputs = ";".join([f"{key}={value}" for key, value in data_inputs_dict.items()])
params = {
"Service": "WPS",
"Request": "Execute",
"Identifier": "get_mcclear",
"version": "1.0.0",
"RawDataOutput": "irradiation",
}
# Use the same HTTPS endpoint and request pattern as pvlib.iotools.get_cams,
# with the host defined in project constants.
base_url = f"https://{MCCLEAR_API_HOST}/service/wps"
try:
res = requests.get(
base_url + "?DataInputs=" + data_inputs,
params=params,
timeout=timeout,
)
except requests.Timeout as exc:
raise requests.Timeout(
f"McClear request timed out for {base_url}: {exc}"
) from exc
# If an error occurs on the server side, CAMS returns a PyWPS-style XML/HTML
# with ows:ExceptionText; bubble that up for easier debugging.
if not res.ok:
text = res.text or ""
if "ows:ExceptionText" in text:
try:
errors = text.split("ows:ExceptionText")[1][1:-2]
except Exception:
errors = text
res.reason = f"{res.reason}: <{errors}>"
res.raise_for_status()
# Successful responses are CSV; parse directly from memory.
fbuf = io.StringIO(res.content.decode("utf-8"))
data = _parse_mcclear(fbuf)
return data
[docs]
def fetch_mcclear(index, latitude, longitude, elev, email, timeout=30):
"""
Retrieve and align McClear data to a target DatetimeIndex.
Parameters
----------
index : pd.DatetimeIndex
Target time index to align McClear outputs to.
latitude : float
Latitude in decimal degrees. [degrees]
longitude : float
Longitude in decimal degrees. [degrees]
elev : float
Site elevation. [m]
email : str
SoDa account email.
timeout : int, default 30
HTTP request timeout in seconds.
Returns
-------
aligned : pd.DataFrame
McClear data reindexed to `index`. Must contain
`ghi_clear`, `bni_clear`, and `dhi_clear`.
Raises
------
ValueError
If ``index`` is not a DatetimeIndex, McClear columns are missing, or the
downloaded frame has an invalid index.
requests.Timeout
Propagated from :func:`_download_mcclear` when the HTTP call times out.
requests.HTTPError
Propagated from SoDa on HTTP failure.
"""
if not isinstance(index, pd.DatetimeIndex):
raise ValueError(
"index must be a pandas DatetimeIndex."
)
# Determine inclusive date range from index
start = pd.Timestamp(index.min()).to_pydatetime()
end = pd.Timestamp(index.max()).to_pydatetime()
data = _download_mcclear(
latitude=latitude,
longitude=longitude,
start=start,
end=end,
email=email,
elev=elev,
timeout=timeout,
)
if not isinstance(data.index, pd.DatetimeIndex):
raise ValueError(
"McClear data index must be DatetimeIndex."
)
data = data.copy()
if data.index.tz is None:
data.index = data.index.tz_localize("UTC")
else:
data.index = data.index.tz_convert("UTC")
required_cols = {"ghi_clear", "bni_clear", "dhi_clear"}
missing = required_cols - set(data.columns)
if missing:
raise ValueError(
f"McClear data missing required columns: {sorted(missing)}"
)
aligned = data.reindex(index)
return aligned