Skip to content

Pipeline evaluation

Cross-validated stage-over-stage comparison with persisted, rerun-safe results.

featurely.pipeline

add_pipeline_step(results_df, label, x, y, color=None, color_map=None, cv=10, results_path=None)

Run cross-validation for one pipeline step and upsert by stage name.

When results_path is provided, prior results are loaded from disk before the update and saved back after the update. This supports sequential notebook runs without duplicated stage rows.

Parameters:

Name Type Description Default
results_df DataFrame | None

Prior results frame, or None to start fresh or load from disk.

required
label str

Stage name; an existing row with this name is replaced.

required
x DataFrame

Feature matrix for this stage.

required
y Series

Target series.

required
color str | None

Optional explicit plot color for this stage.

None
color_map dict[str, str] | None

Optional stage-name-to-color mapping. When omitted, no package palette is applied and plotting uses Matplotlib defaults.

None
cv int

Number of cross-validation folds.

10
results_path str | Path | None

Optional pickle path for persisted, rerun-safe results.

None

Returns:

Type Description
DataFrame

The updated results frame with recomputed percent-vs-raw values.

Source code in src/featurely/pipeline.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def add_pipeline_step(
    results_df: pd.DataFrame | None,
    label: str,
    x: pd.DataFrame,
    y: pd.Series,
    color: str | None = None,
    color_map: dict[str, str] | None = None,
    cv: int = 10,
    results_path: str | Path | None = None,
) -> pd.DataFrame:
    """Run cross-validation for one pipeline step and upsert by stage name.

    When ``results_path`` is provided, prior results are loaded from disk before
    the update and saved back after the update. This supports sequential notebook
    runs without duplicated stage rows.

    Args:
        results_df: Prior results frame, or None to start fresh or load from disk.
        label: Stage name; an existing row with this name is replaced.
        x: Feature matrix for this stage.
        y: Target series.
        color: Optional explicit plot color for this stage.
        color_map: Optional stage-name-to-color mapping. When omitted, no
            package palette is applied and plotting uses Matplotlib defaults.
        cv: Number of cross-validation folds.
        results_path: Optional pickle path for persisted, rerun-safe results.

    Returns:
        The updated results frame with recomputed percent-vs-raw values.
    """

    updated = _load_pipeline_results(results_df, results_path)
    scores = cross_val_score(LinearRegression(), x, y, cv=cv, scoring="r2")

    if color is None and color_map is not None:
        color = color_map.get(label)

    raw_rows = updated.loc[updated["stage"] == "raw", "mean_r2"] if not updated.empty else pd.Series(dtype=float)
    raw_mean = float(raw_rows.iloc[0]) if len(raw_rows) > 0 else 0.0
    pct_vs_raw = 0.0 if label == "raw" or raw_mean == 0.0 else (scores.mean() - raw_mean) / abs(raw_mean) * 100

    row = {
        "stage": label,
        "mean_r2": scores.mean(),
        "std_r2": scores.std(),
        "pct_vs_raw": pct_vs_raw,
        "color": color,
        "scores": scores,
    }

    if not updated.empty:
        # Keep latest entry when prior notebook runs created duplicate stage names.
        updated = updated.drop_duplicates(subset=["stage"], keep="last").reset_index(drop=True)

    stage_matches = updated.index[updated["stage"] == label].tolist()

    if stage_matches:
        idx = stage_matches[0]

        for col, value in row.items():
            updated.at[idx, col] = value

    else:
        updated.loc[len(updated)] = row

    updated = _recompute_pct_vs_raw(updated)
    _save_pipeline_results(updated, results_path)

    return updated

plot_pipeline_steps(results_df, title='CV R2 pipeline steps', results_path=None)

Draw stage-wise cross-validation boxplots and print a text summary.

When results_path is provided, results are loaded from disk before plotting.

Parameters:

Name Type Description Default
results_df DataFrame | None

Results frame from add_pipeline_step, or None when loading from results_path.

required
title str

Plot title.

'CV R2 pipeline steps'
results_path str | Path | None

Optional pickle path to load persisted results from.

None

Raises:

Type Description
ValueError

If no results are available to plot.

Source code in src/featurely/pipeline.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def plot_pipeline_steps(
    results_df: pd.DataFrame | None,
    title: str = "CV R2 pipeline steps",
    results_path: str | Path | None = None,
) -> None:
    """Draw stage-wise cross-validation boxplots and print a text summary.

    When ``results_path`` is provided, results are loaded from disk before plotting.

    Args:
        results_df: Results frame from ``add_pipeline_step``, or None when
            loading from ``results_path``.
        title: Plot title.
        results_path: Optional pickle path to load persisted results from.

    Raises:
        ValueError: If no results are available to plot.
    """

    results_df = _load_pipeline_results(results_df, results_path)

    if results_df.empty:
        raise ValueError("No pipeline results available to plot.")

    # Keep summaries consistent even when loading results created before pct_vs_raw existed.
    results_df = _recompute_pct_vs_raw(results_df.copy())

    labels = results_df["stage"].tolist()
    all_scores = results_df["scores"].tolist()

    explicit_colors = results_df["color"].tolist()

    # Fall back to Matplotlib's default property cycle when callers do not
    # provide a color for a stage.
    cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    colors = [color if pd.notna(color) else cycle[i % len(cycle)] for i, color in enumerate(explicit_colors)]

    _, ax = plt.subplots(figsize=(max(5, 2 * len(labels)), 4))
    bp = ax.boxplot(all_scores, tick_labels=labels, patch_artist=True)

    for patch, color in zip(bp["boxes"], colors, strict=False):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)

    for median in bp["medians"]:
        median.set(color="black", linewidth=1.5)

    ax.set_title(title)
    ax.set_ylabel("R2 score (10-fold CV)")
    plt.xticks(rotation=15, ha="right")
    plt.tight_layout()
    show_figure()

    for _, row in results_df.iterrows():
        pct_vs_raw = float(row["pct_vs_raw"]) if pd.notna(row["pct_vs_raw"]) else 0.0
        print(f"{row['stage']:>25}: mean R2 = {row['mean_r2']:.4f} ± {row['std_r2']:.4f}  ({pct_vs_raw:+.2f}% vs raw)")