"""
Clear-sky detection (CSD) results: one page per day, four rows per page (one CSD model per row).
"""
import numpy as np
import pandas as pd
from plotnine import (
ggplot,
aes,
geom_line,
geom_point,
geom_ribbon,
facet_grid,
theme_minimal,
theme,
element_text,
labs,
scale_x_datetime,
)
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
from bsrn.constants import BSRN_STATIONS, WONG_PALETTE
from bsrn.dataset import BSRNDataset
from bsrn.modeling.clear_sky import add_clearsky_columns
from bsrn.physics.geometry import get_solar_position, get_ghi_extra
from bsrn.utils import reno_csd, ineichen_csd, lefevre_csd, brightsun_csd
# Display order for the four CSD methods (rows top to bottom).
CSD_METHOD_ORDER = ("Reno", "Ineichen", "Lefevre", "BrightSun")
[docs]
def plot_csd_booklet(file_path, output_file, station_code, df=None, title=None):
"""
Generate a PDF: one page per day, four rows (CSD methods) × three columns (GHI, BNI, DHI).
Clear-sky detection is driven by GHI (and each method's required inputs). The same
``is_clearsky`` flag is overlaid on the GHI, BNI, and DHI time series in each row.
Parameters
----------
file_path : str
Path to the BSRN station-to-archive file (one month, e.g. .dat.gz).
Ignored if ``df`` is provided.
output_file : str
Path to the output PDF file.
station_code : str
Station abbreviation (e.g. "QIQ") for clear-sky and site location.
df : pd.DataFrame, optional
If provided, use this DataFrame instead of reading from ``file_path``.
Must have one-month DatetimeIndex and columns ghi, bni, dhi.
title : str, optional
Same title on every PDF page. If None (default), no plot title.
Returns
-------
None
Saves the booklet to the given PDF path.
"""
# Load data and ensure December only.
if df is None:
df = BSRNDataset.from_file(file_path).data()
df = df.sort_index()
unique_months = df.index.to_period("M").unique()
if len(unique_months) != 1:
raise ValueError(f"File must contain exactly one month. Found: {unique_months}")
stn = BSRN_STATIONS.get(station_code, {"lat": 0, "lon": 0, "elev": 0})
lat, lon, elev = stn["lat"], stn["lon"], stn["elev"]
# Solar position and extraterrestrial GHI
solpos = get_solar_position(df.index, lat, lon, elev)
zenith = solpos["zenith"].values
ghi_extra = np.asarray(get_ghi_extra(df.index, zenith), dtype=float)
# Clear-sky columns (required for Reno, BrightSun, and reference curves)
df = add_clearsky_columns(df, station_code)
ghi = df["ghi"].values
bni = df["bni"].values
dhi = df["dhi"].values
ghi_clear = df["ghi_clear"].values
bni_clear = df["bni_clear"].values
dhi_clear = df["dhi_clear"].values
times = df.index
# Run all four CSD methods (inputs are GHI and method-specific: ghi_clear, ghi_extra, zenith, dhi, etc.).
out_reno = reno_csd(ghi, ghi_clear, times=times)
out_ineichen = ineichen_csd(ghi, ghi_extra, zenith, times=times)
out_lefevre = lefevre_csd(ghi, dhi, ghi_extra, zenith, times=times)
out_brightsun = brightsun_csd(zenith, ghi, ghi_clear, dhi, dhi_clear, times)
# Build merged frame: time, ghi/bni/dhi, clear-sky cols, and one column per method.
merged = pd.DataFrame(
index=times,
data={
"ghi": ghi,
"bni": bni,
"dhi": dhi,
"ghi_clear": ghi_clear,
"bni_clear": bni_clear,
"dhi_clear": dhi_clear,
"is_reno": out_reno["is_clearsky"],
"is_ineichen": out_ineichen["is_clearsky"],
"is_lefevre": out_lefevre["is_clearsky"],
"is_brightsun": out_brightsun["is_clearsky"],
},
)
merged.index.name = "time"
merged = merged.reset_index()
# Long format: one row per (time, method), with ghi, bni, dhi and clear-sky, is_clearsky.
long_list = []
for method in CSD_METHOD_ORDER:
col = f"is_{method.lower()}"
sub = merged[
["time", "ghi", "bni", "dhi", "ghi_clear", "bni_clear", "dhi_clear", col]
].copy()
sub = sub.rename(columns={col: "is_clearsky"})
sub["method"] = method
long_list.append(sub)
long_df = pd.concat(long_list, ignore_index=True)
long_df["method"] = pd.Categorical(
long_df["method"], categories=list(CSD_METHOD_ORDER), ordered=True
)
# Long format for plotting: GHI / BNI / DHI time series; overlay is_clearsky points on all three.
m1 = long_df.melt(
id_vars=["time", "method", "is_clearsky"],
value_vars=["ghi", "bni", "dhi"],
var_name="param",
value_name="measured",
)
m2 = long_df.melt(
id_vars=["time", "method"],
value_vars=["ghi_clear", "bni_clear", "dhi_clear"],
var_name="param_clear",
value_name="clearsky",
)
param_map = {"ghi": "GHI", "bni": "BNI", "dhi": "DHI"}
m1["parameter"] = m1["param"].map(param_map)
m2["parameter"] = m2["param_clear"].str.replace("_clear", "").map(param_map)
plot_df = m1[["time", "method", "parameter", "measured", "is_clearsky"]].merge(
m2[["time", "method", "parameter", "clearsky"]],
on=["time", "method", "parameter"],
)
plot_df["parameter"] = pd.Categorical(
plot_df["parameter"], categories=["GHI", "BNI", "DHI"], ordered=True
)
clear_only = plot_df.loc[plot_df["is_clearsky"] == True].copy()
# Figure size: 160 mm wide, height for 4 facet rows × 3 columns
width_inch = 160 / 25.4
height_inch = width_inch / 3 * 4 / 1.4
print(f"Generating CSD booklet: {output_file} ...")
with PdfPages(output_file) as pdf:
for date, day_df in plot_df.groupby(pd.to_datetime(plot_df["time"]).dt.date):
day_clear = clear_only.loc[
pd.to_datetime(clear_only["time"]).dt.date == date
]
# Wong: [1] sky blue (ribbon/clear curve), [0] orange (measured), [2] bluish green (CSD points)
ribbon_color = WONG_PALETTE[1]
_lab = {"x": "Time (UTC)", "y": "[W/m²]"}
if title is not None:
_lab["title"] = title
p = (
ggplot(day_df, aes(x="time"))
+ geom_ribbon(
mapping=aes(ymin=0, ymax="clearsky"),
fill=ribbon_color,
alpha=0.25,
)
+ geom_line(
aes(y="clearsky"), color=ribbon_color, size=0.2, alpha=0.7, linetype="dashed"
)
+ geom_line(aes(y="measured"), color=WONG_PALETTE[0], size=0.5)
+ geom_point(
data=day_clear,
mapping=aes(y="measured"),
color=WONG_PALETTE[2],
size=0.3,
alpha=0.7,
)
+ facet_grid("method ~ parameter", scales="free_y")
+ labs(**_lab)
+ theme_minimal()
+ theme(
text=element_text(family="Times New Roman", size=9),
axis_title=element_text(size=9),
axis_text=element_text(size=9),
plot_title=element_text(size=9, margin={"b": 10}),
figure_size=(width_inch, height_inch),
strip_text=element_text(size=9),
)
+ scale_x_datetime(date_labels="%H:%M")
)
fig = p.draw()
pdf.savefig(fig, bbox_inches="tight")
plt.close(fig)
print("Done.")