"""
Clear-sky detection (CSD) results: one page per day, four rows per page (one per CSD model).
晴空检测 (CSD) 结果:每天一页,每页四行(每行对应一种 CSD 模型)。
"""
import numpy as np
import pandas as pd
from plotnine import (
ggplot,
aes,
geom_line,
geom_point,
geom_ribbon,
facet_grid,
theme_minimal,
theme,
element_text,
labs,
scale_x_datetime,
)
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
from bsrn.constants import BSRN_STATIONS, WONG_PALETTE
from bsrn.dataset import BSRNDataset
from bsrn.modeling.clear_sky import add_clearsky_columns
from bsrn.physics.geometry import get_solar_position, get_ghi_extra
from bsrn.utils import reno_csd, ineichen_csd, lefevre_csd, brightsun_csd
# Display order for the four CSD methods (rows top to bottom).
CSD_METHOD_ORDER = ("Reno", "Ineichen", "Lefevre", "BrightSun")
[docs]
def plot_csd_booklet(file_path, output_file, station_code, df=None, title=None):
"""
Generate a PDF: one page per day, four rows (CSD methods) x three columns (GHI, BNI, DHI).
生成 PDF:每天一页,四行(CSD 方法)× 三列(GHI、BNI、DHI)。
Clear-sky detection is driven by GHI (and each method's required inputs). The same
is_clearsky flag is overlaid on the GHI, BNI, and DHI time series in each row.
晴空检测以 GHI(及各方法所需输入)为输入;同一 is_clearsky 标记叠加到该行的 GHI、BNI、DHI 时间序列上。
Parameters
----------
file_path : str
Path to the BSRN station-to-archive file (one month, e.g. .dat.gz).
Ignored if `df` is provided.
BSRN 站点存档文件路径(单月,如 .dat.gz);提供 df 时忽略。
output_file : str
Path to the output PDF file.
输出 PDF 路径。
station_code : str
Station abbreviation (e.g. "QIQ") for clear-sky and site location.
站点缩写(如 "QIQ"),用于晴空与站点位置。
df : pd.DataFrame, optional
If provided, use this DataFrame instead of reading from ``file_path``.
Must have one-month DatetimeIndex and columns ghi, bni, dhi.
若提供,则使用此 DataFrame 而非从文件读取;须为单月且含 ghi, bni, dhi。
title : str, optional
Same title on every PDF page. If None (default), no plot title.
每页共用标题;默认 None 不显示。
Returns
-------
None
Saves the booklet to the given PDF path.
将手册保存到指定 PDF。
"""
# Load data and ensure December only.
# 加载数据并确保仅包含十二月。
if df is None:
df = BSRNDataset.from_file(file_path).data()
df = df.sort_index()
unique_months = df.index.to_period("M").unique()
if len(unique_months) != 1:
raise ValueError(f"File must contain exactly one month. Found: {unique_months}")
stn = BSRN_STATIONS.get(station_code, {"lat": 0, "lon": 0, "elev": 0})
lat, lon, elev = stn["lat"], stn["lon"], stn["elev"]
# Solar position and extraterrestrial GHI.
# 太阳位置和地外水平辐照度。
solpos = get_solar_position(df.index, lat, lon, elev)
zenith = solpos["zenith"].values
ghi_extra = np.asarray(get_ghi_extra(df.index, zenith), dtype=float)
# Clear-sky columns (required for Reno, BrightSun, and for reference).
# 晴空列(Reno、BrightSun 所需,以及参考)。
df = add_clearsky_columns(df, station_code)
ghi = df["ghi"].values
bni = df["bni"].values
dhi = df["dhi"].values
ghi_clear = df["ghi_clear"].values
bni_clear = df["bni_clear"].values
dhi_clear = df["dhi_clear"].values
times = df.index
# Run all four CSD methods (inputs are GHI and method-specific: ghi_clear, ghi_extra, zenith, dhi, etc.).
# 运行四种 CSD 方法(输入为 GHI 及各方法所需:ghi_clear、ghi_extra、zenith、dhi 等)。
out_reno = reno_csd(ghi, ghi_clear, times=times)
out_ineichen = ineichen_csd(ghi, ghi_extra, zenith, times=times)
out_lefevre = lefevre_csd(ghi, dhi, ghi_extra, zenith, times=times)
out_brightsun = brightsun_csd(zenith, ghi, ghi_clear, dhi, dhi_clear, times)
# Build merged frame: time, ghi/bni/dhi, clear-sky cols, and one column per method.
# 构建合并框架:时间、GHI/BNI/DHI、晴空列、每种方法一列。
merged = pd.DataFrame(
index=times,
data={
"ghi": ghi,
"bni": bni,
"dhi": dhi,
"ghi_clear": ghi_clear,
"bni_clear": bni_clear,
"dhi_clear": dhi_clear,
"is_reno": out_reno["is_clearsky"],
"is_ineichen": out_ineichen["is_clearsky"],
"is_lefevre": out_lefevre["is_clearsky"],
"is_brightsun": out_brightsun["is_clearsky"],
},
)
merged.index.name = "time"
merged = merged.reset_index()
# Long format: one row per (time, method), with ghi, bni, dhi and clear-sky, is_clearsky.
# 长格式:每行对应 (time, method),包含 ghi、bni、dhi 和晴空、is_clearsky。
long_list = []
for method in CSD_METHOD_ORDER:
col = f"is_{method.lower()}"
sub = merged[
["time", "ghi", "bni", "dhi", "ghi_clear", "bni_clear", "dhi_clear", col]
].copy()
sub = sub.rename(columns={col: "is_clearsky"})
sub["method"] = method
long_list.append(sub)
long_df = pd.concat(long_list, ignore_index=True)
long_df["method"] = pd.Categorical(
long_df["method"], categories=list(CSD_METHOD_ORDER), ordered=True
)
# Long format for plotting: GHI / BNI / DHI time series; overlay is_clearsky points on all three.
# 绘图用长表:GHI / BNI / DHI 时间序列;在同一行的三个面板上叠加 is_clearsky 点。
m1 = long_df.melt(
id_vars=["time", "method", "is_clearsky"],
value_vars=["ghi", "bni", "dhi"],
var_name="param",
value_name="measured",
)
m2 = long_df.melt(
id_vars=["time", "method"],
value_vars=["ghi_clear", "bni_clear", "dhi_clear"],
var_name="param_clear",
value_name="clearsky",
)
param_map = {"ghi": "GHI", "bni": "BNI", "dhi": "DHI"}
m1["parameter"] = m1["param"].map(param_map)
m2["parameter"] = m2["param_clear"].str.replace("_clear", "").map(param_map)
plot_df = m1[["time", "method", "parameter", "measured", "is_clearsky"]].merge(
m2[["time", "method", "parameter", "clearsky"]],
on=["time", "method", "parameter"],
)
plot_df["parameter"] = pd.Categorical(
plot_df["parameter"], categories=["GHI", "BNI", "DHI"], ordered=True
)
clear_only = plot_df.loc[plot_df["is_clearsky"] == True].copy()
# Figure size: 160 mm width, height for 4 rows x 3 cols.
# 图宽 160 mm,高为 4 行 × 3 列。
width_inch = 160 / 25.4
height_inch = width_inch / 3 * 4 / 1.4
print(f"Generating CSD booklet: {output_file} ...")
with PdfPages(output_file) as pdf:
for date, day_df in plot_df.groupby(pd.to_datetime(plot_df["time"]).dt.date):
day_clear = clear_only.loc[
pd.to_datetime(clear_only["time"]).dt.date == date
]
# Wong palette: [1] Sky Blue (clearsky/ribbon), [0] Orange (measured), [2] Bluish Green (CSD points)
# Wong 配色方案:[1] 天蓝色(晴空/色带),[0] 橙色(实测),[2] 蓝绿色(CSD 点)
ribbon_color = WONG_PALETTE[1]
_lab = {"x": "Time (UTC)", "y": "[W/m²]"}
if title is not None:
_lab["title"] = title
p = (
ggplot(day_df, aes(x="time"))
+ geom_ribbon(
mapping=aes(ymin=0, ymax="clearsky"),
fill=ribbon_color,
alpha=0.25,
)
+ geom_line(
aes(y="clearsky"), color=ribbon_color, size=0.2, alpha=0.7, linetype="dashed"
)
+ geom_line(aes(y="measured"), color=WONG_PALETTE[0], size=0.5)
+ geom_point(
data=day_clear,
mapping=aes(y="measured"),
color=WONG_PALETTE[2],
size=0.3,
alpha=0.7,
)
+ facet_grid("method ~ parameter", scales="free_y")
+ labs(**_lab)
+ theme_minimal()
+ theme(
text=element_text(family="Times New Roman", size=9),
axis_title=element_text(size=9),
axis_text=element_text(size=9),
plot_title=element_text(size=9, margin={"b": 10}),
figure_size=(width_inch, height_inch),
strip_text=element_text(size=9),
)
+ scale_x_datetime(date_labels="%H:%M")
)
fig = p.draw()
pdf.savefig(fig, bbox_inches="tight")
plt.close(fig)
print("Done.")