Skip to content

Transforms

Scaling and monotonic transforms for distribution shaping.

featurely.transforms

apply_standard_scale(df, feature_cols)

Return a copy with standard scaling applied to selected feature columns.

Parameters:

Name Type Description Default
df DataFrame

Input frame; not modified.

required
feature_cols list[str]

Columns to scale to zero mean and unit variance.

required

Returns:

Type Description
DataFrame

A copy of df with the selected columns scaled.

Source code in src/featurely/transforms.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
def apply_standard_scale(df: pd.DataFrame, feature_cols: list[str]) -> pd.DataFrame:
    """Return a copy with standard scaling applied to selected feature columns.

    Args:
        df: Input frame; not modified.
        feature_cols: Columns to scale to zero mean and unit variance.

    Returns:
        A copy of ``df`` with the selected columns scaled.
    """

    result = df.copy()
    result[feature_cols] = StandardScaler().fit_transform(result[feature_cols])

    return result

apply_log1p(df, feature_cols)

Return a copy with log1p transform (with non-negative shift) then scaling.

Parameters:

Name Type Description Default
df DataFrame

Input frame; not modified.

required
feature_cols list[str]

Columns to transform; columns with negative minimums are shifted to zero before log1p.

required

Returns:

Type Description
DataFrame

A copy of df with the selected columns transformed and scaled.

Source code in src/featurely/transforms.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def apply_log1p(df: pd.DataFrame, feature_cols: list[str]) -> pd.DataFrame:
    """Return a copy with log1p transform (with non-negative shift) then scaling.

    Args:
        df: Input frame; not modified.
        feature_cols: Columns to transform; columns with negative minimums are
            shifted to zero before ``log1p``.

    Returns:
        A copy of ``df`` with the selected columns transformed and scaled.
    """

    result = df.copy()
    x = result[feature_cols].copy()

    for col in feature_cols:
        col_min = x[col].min()
        shift = -col_min if col_min < 0 else 0
        result[col] = np.log1p(x[col] + shift)

    result[feature_cols] = StandardScaler().fit_transform(result[feature_cols])

    return result

apply_sqrt(df, feature_cols)

Return a copy with square-root transform (with non-negative shift) then scaling.

Parameters:

Name Type Description Default
df DataFrame

Input frame; not modified.

required
feature_cols list[str]

Columns to transform; columns with negative minimums are shifted to zero before the square root.

required

Returns:

Type Description
DataFrame

A copy of df with the selected columns transformed and scaled.

Source code in src/featurely/transforms.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def apply_sqrt(df: pd.DataFrame, feature_cols: list[str]) -> pd.DataFrame:
    """Return a copy with square-root transform (with non-negative shift) then scaling.

    Args:
        df: Input frame; not modified.
        feature_cols: Columns to transform; columns with negative minimums are
            shifted to zero before the square root.

    Returns:
        A copy of ``df`` with the selected columns transformed and scaled.
    """

    result = df.copy()
    x = result[feature_cols].copy()

    for col in feature_cols:
        col_min = x[col].min()
        shift = -col_min if col_min < 0 else 0
        result[col] = np.sqrt(x[col] + shift)

    result[feature_cols] = StandardScaler().fit_transform(result[feature_cols])

    return result

apply_yeo_johnson(df, feature_cols)

Return a copy with Yeo-Johnson transform applied to selected columns.

Yeo-Johnson fits a Box-Cox-style power parameter per column and handles negative values natively.

Parameters:

Name Type Description Default
df DataFrame

Input frame; not modified.

required
feature_cols list[str]

Columns to transform.

required

Returns:

Type Description
DataFrame

A copy of df with the selected columns transformed.

Source code in src/featurely/transforms.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def apply_yeo_johnson(df: pd.DataFrame, feature_cols: list[str]) -> pd.DataFrame:
    """Return a copy with Yeo-Johnson transform applied to selected columns.

    Yeo-Johnson fits a Box-Cox-style power parameter per column and handles
    negative values natively.

    Args:
        df: Input frame; not modified.
        feature_cols: Columns to transform.

    Returns:
        A copy of ``df`` with the selected columns transformed.
    """

    result = df.copy()
    result[feature_cols] = PowerTransformer(method="yeo-johnson").fit_transform(result[feature_cols])

    return result

apply_quantile_normal(df, feature_cols, random_state=315)

Return a copy with quantile-to-normal transform applied to selected columns.

Maps each column's empirical CDF onto a standard normal distribution, which erases outliers and skew but distorts within-column spacing.

Parameters:

Name Type Description Default
df DataFrame

Input frame; not modified.

required
feature_cols list[str]

Columns to transform.

required
random_state int

Seed for the quantile transformer's subsampling.

315

Returns:

Type Description
DataFrame

A copy of df with the selected columns transformed.

Source code in src/featurely/transforms.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def apply_quantile_normal(df: pd.DataFrame, feature_cols: list[str], random_state: int = 315) -> pd.DataFrame:
    """Return a copy with quantile-to-normal transform applied to selected columns.

    Maps each column's empirical CDF onto a standard normal distribution,
    which erases outliers and skew but distorts within-column spacing.

    Args:
        df: Input frame; not modified.
        feature_cols: Columns to transform.
        random_state: Seed for the quantile transformer's subsampling.

    Returns:
        A copy of ``df`` with the selected columns transformed.
    """

    result = df.copy()

    result[feature_cols] = QuantileTransformer(output_distribution="normal", random_state=random_state).fit_transform(
        result[feature_cols]
    )

    return result