Pipeline evaluation

Cross-validated stage-over-stage comparison with persisted, rerun-safe results.

`featurely.pipeline`

`add_pipeline_step(results_df, label, x, y, color=None, color_map=None, cv=10, results_path=None)`

Run cross-validation for one pipeline step and upsert by stage name.

When results_path is provided, prior results are loaded from disk before the update and saved back after the update. This supports sequential notebook runs without duplicated stage rows.

Parameters:

Name	Type	Description	Default
`results_df`	`DataFrame \| None`	Prior results frame, or None to start fresh or load from disk.	required
`label`	`str`	Stage name; an existing row with this name is replaced.	required
`x`	`DataFrame`	Feature matrix for this stage.	required
`y`	`Series`	Target series.	required
`color`	`str \| None`	Optional explicit plot color for this stage.	`None`
`color_map`	`dict[str, str] \| None`	Optional stage-name-to-color mapping. When omitted, no package palette is applied and plotting uses Matplotlib defaults.	`None`
`cv`	`int`	Number of cross-validation folds.	`10`
`results_path`	`str \| Path \| None`	Optional pickle path for persisted, rerun-safe results.	`None`

Returns:

Type	Description
`DataFrame`	The updated results frame with recomputed percent-vs-raw values.

Source code in src/featurely/pipeline.py

def add_pipeline_step(
    results_df: pd.DataFrame | None,
    label: str,
    x: pd.DataFrame,
    y: pd.Series,
    color: str | None = None,
    color_map: dict[str, str] | None = None,
    cv: int = 10,
    results_path: str | Path | None = None,
) -> pd.DataFrame:
    """Run cross-validation for one pipeline step and upsert by stage name.

    When ``results_path`` is provided, prior results are loaded from disk before
    the update and saved back after the update. This supports sequential notebook
    runs without duplicated stage rows.

    Args:
        results_df: Prior results frame, or None to start fresh or load from disk.
        label: Stage name; an existing row with this name is replaced.
        x: Feature matrix for this stage.
        y: Target series.
        color: Optional explicit plot color for this stage.
        color_map: Optional stage-name-to-color mapping. When omitted, no
            package palette is applied and plotting uses Matplotlib defaults.
        cv: Number of cross-validation folds.
        results_path: Optional pickle path for persisted, rerun-safe results.

    Returns:
        The updated results frame with recomputed percent-vs-raw values.
    """

    updated = _load_pipeline_results(results_df, results_path)
    scores = cross_val_score(LinearRegression(), x, y, cv=cv, scoring="r2")

    if color is None and color_map is not None:
        color = color_map.get(label)

    raw_rows = updated.loc[updated["stage"] == "raw", "mean_r2"] if not updated.empty else pd.Series(dtype=float)
    raw_mean = float(raw_rows.iloc[0]) if len(raw_rows) > 0 else 0.0
    pct_vs_raw = 0.0 if label == "raw" or raw_mean == 0.0 else (scores.mean() - raw_mean) / abs(raw_mean) * 100

    row = {
        "stage": label,
        "mean_r2": scores.mean(),
        "std_r2": scores.std(),
        "pct_vs_raw": pct_vs_raw,
        "color": color,
        "scores": scores,
    }

    if not updated.empty:
        # Keep latest entry when prior notebook runs created duplicate stage names.
        updated = updated.drop_duplicates(subset=["stage"], keep="last").reset_index(drop=True)

    stage_matches = updated.index[updated["stage"] == label].tolist()

    if stage_matches:
        idx = stage_matches[0]

        for col, value in row.items():
            updated.at[idx, col] = value

    else:
        updated.loc[len(updated)] = row

    updated = _recompute_pct_vs_raw(updated)
    _save_pipeline_results(updated, results_path)

    return updated

`plot_pipeline_steps(results_df, title='CV R2 pipeline steps', results_path=None)`

Draw stage-wise cross-validation boxplots and print a text summary.

When results_path is provided, results are loaded from disk before plotting.

Parameters:

Name	Type	Description	Default
`results_df`	`DataFrame \| None`	Results frame from `add_pipeline_step`, or None when loading from `results_path`.	required
`title`	`str`	Plot title.	`'CV R2 pipeline steps'`
`results_path`	`str \| Path \| None`	Optional pickle path to load persisted results from.	`None`

Raises:

Type	Description
`ValueError`	If no results are available to plot.

Source code in src/featurely/pipeline.py

def plot_pipeline_steps(
    results_df: pd.DataFrame | None,
    title: str = "CV R2 pipeline steps",
    results_path: str | Path | None = None,
) -> None:
    """Draw stage-wise cross-validation boxplots and print a text summary.

    When ``results_path`` is provided, results are loaded from disk before plotting.

    Args:
        results_df: Results frame from ``add_pipeline_step``, or None when
            loading from ``results_path``.
        title: Plot title.
        results_path: Optional pickle path to load persisted results from.

    Raises:
        ValueError: If no results are available to plot.
    """

    results_df = _load_pipeline_results(results_df, results_path)

    if results_df.empty:
        raise ValueError("No pipeline results available to plot.")

    # Keep summaries consistent even when loading results created before pct_vs_raw existed.
    results_df = _recompute_pct_vs_raw(results_df.copy())

    labels = results_df["stage"].tolist()
    all_scores = results_df["scores"].tolist()

    explicit_colors = results_df["color"].tolist()

    # Fall back to Matplotlib's default property cycle when callers do not
    # provide a color for a stage.
    cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"]
    colors = [color if pd.notna(color) else cycle[i % len(cycle)] for i, color in enumerate(explicit_colors)]

    _, ax = plt.subplots(figsize=(max(5, 2 * len(labels)), 4))
    bp = ax.boxplot(all_scores, tick_labels=labels, patch_artist=True)

    for patch, color in zip(bp["boxes"], colors, strict=False):
        patch.set_facecolor(color)
        patch.set_alpha(0.7)

    for median in bp["medians"]:
        median.set(color="black", linewidth=1.5)

    ax.set_title(title)
    ax.set_ylabel("R2 score (10-fold CV)")
    plt.xticks(rotation=15, ha="right")
    plt.tight_layout()
    show_figure()

    for _, row in results_df.iterrows():
        pct_vs_raw = float(row["pct_vs_raw"]) if pd.notna(row["pct_vs_raw"]) else 0.0
        print(f"{row['stage']:>25}: mean R2 = {row['mean_r2']:.4f} ± {row['std_r2']:.4f}  ({pct_vs_raw:+.2f}% vs raw)")