API reference

Feature Effect Methods

`effector.global_effect_ale.ALEBase`

Bases: GlobalEffectBase

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

class ALEBase(GlobalEffectBase):

    def __init__(
            self,
            data: np.ndarray,
            model: callable,
            nof_instances: Union[int, str] = "all",
            axis_limits: Optional[np.ndarray] = None,
            avg_output: Optional[float] = None,
            feature_names: Optional[List] = None,
            target_name: Optional[str] = None,
            method_name: str = "ALE",
    ):
        self.method_name = method_name
        super(ALEBase, self).__init__(
            method_name,
            data,
            model,
            nof_instances,
            axis_limits,
            avg_output,
            feature_names,
            target_name
        )

    @abstractmethod
    def _fit_feature(self,
                     feature: int,
                     binning_method: typing.Union[str, bm.DynamicProgramming, bm.Greedy, bm.Fixed] = "greedy"
                     ) -> typing.Dict:
        raise NotImplementedError

    @abstractmethod
    def fit(self,
            features: typing.Union[int, str, list] = "all",
            **kwargs) -> None:
        raise NotImplementedError

    def _compute_norm_const(
        self, feature: int, method: str = "zero_integral", nof_points: int = 100
    ) -> float:
        """Compute the normalization constant."""
        assert method in ["zero_integral", "zero_start"]

        def create_partial_eval(feat):
            return lambda x: self._eval_unnorm(feat, x, heterogeneity=False)

        partial_eval = create_partial_eval(feature)
        start = self.axis_limits[0, feature]
        stop = self.axis_limits[1, feature]

        if method == "zero_integral":
            z = utils_integrate.mean_1d_linspace(partial_eval, start, stop, nof_points)
        else:
            z = partial_eval(np.array([start])).item()
        return z

    def _fit_loop(self, features, binning_method, centering):
        features = helpers.prep_features(features, self.dim)
        centering = helpers.prep_centering(centering)
        for s in features:
            # compute all information required for plotting and evaluating the feature effect
            self.feature_effect["feature_" + str(s)] = self._fit_feature(
                s, binning_method
            )

            # append the "norm_const" to the feature effect if centering is not False
            if centering is not False:
                self.feature_effect["feature_" + str(s)]["norm_const"] = self._compute_norm_const(s, method=centering)
            else:
                self.feature_effect["feature_" + str(s)]["norm_const"] = self.empty_symbol

            self.is_fitted[s] = True
            self.method_args["feature_" + str(s)] = {
                "centering": centering,
            }

    def _eval_unnorm(self, feature: int, x: np.ndarray, heterogeneity: bool = False):
        params = self.feature_effect["feature_" + str(feature)]
        y = utils.compute_accumulated_effect(
            x, limits=params["limits"], bin_effect=params["bin_effect"], dx=params["dx"]
        )
        if heterogeneity:
            std = utils.compute_accumulated_effect(
                x,
                limits=params["limits"],
                bin_effect=np.sqrt(params["bin_variance"]),
                dx=params["dx"],
            )

            return y, std
        else:
            return y

    def eval(
        self,
        feature: int,
        xs: np.ndarray,
        heterogeneity: bool = False,
        centering: typing.Union[bool, str] = False,
    ) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
        """Evalueate the (RH)ALE feature effect of feature `feature` at points `xs`.

        Notes:
            This is a common method inherited by both ALE and RHALE.

        Args:
            feature: index of feature of interest
            xs: the points along the s-th axis to evaluate the FE plot
              - `np.ndarray` of shape `(T, )`
            heterogeneity: whether to return heterogeneity:

                  - `False`, returns the mean effect `y` at the given `xs`
                  - `True`, returns a tuple `(y, H)` of two `ndarrays`; `y` is the mean effect and `H` is the
                  heterogeneity evaluated at `xs`

            centering: whether to center the plot:

                - `False` means no centering
                - `True` or `zero_integral` centers around the `y` axis.
                - `zero_start` starts the plot from `y=0`.
        Returns:
            the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

        """
        centering = helpers.prep_centering(centering)

        if self.refit(feature, centering):
            self.fit(features=feature, centering=centering)

        # Check if the lower bound is less than the upper bound
        assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

        # Evaluate the feature
        yy = self._eval_unnorm(feature, xs, heterogeneity=heterogeneity)
        y, std = yy if heterogeneity else (yy, None)

        # Center if asked
        y = (
            y - self.feature_effect["feature_" + str(feature)]["norm_const"]
            if centering
            else y
        )

        return (y, std) if heterogeneity is not False else y

    def plot(
            self,
            feature: int,
            heterogeneity: bool = False,
            centering: Union[bool, str] = False,
            scale_x: Optional[dict] = None,
            scale_y: Optional[dict] = None,
            show_avg_output: bool = False,
            y_limits: Optional[List] = None,
            dy_limits: Optional[List] = None
    ):
        """
        Plot the (RH)ALE feature effect of feature `feature`.

        Notes:
            This is a common method inherited by both ALE and RHALE.

        Parameters:
            feature: the feature to plot
            heterogeneity: whether to plot the heterogeneity

                  - `False`, plots only the mean effect
                  - `True`, the std of the bin-effects will be plotted using a red vertical bar

            centering: whether to center the plot:

                - `False` means no centering
                - `True` or `zero_integral` centers around the `y` axis.
                - `zero_start` starts the plot from `y=0`.

            scale_x: None or Dict with keys ['std', 'mean']

                - If set to None, no scaling will be applied.
                - If set to a dict, the x-axis will be scaled by the standard deviation and the mean.
            scale_y: None or Dict with keys ['std', 'mean']

                - If set to None, no scaling will be applied.
                - If set to a dict, the y-axis will be scaled by the standard deviation and the mean.
            show_avg_output: if True, the average output will be shown as a horizontal line.
            y_limits: None or tuple, the limits of the y-axis

                - If set to None, the limits of the y-axis are set automatically
                - If set to a tuple, the limits are manually set

            dy_limits: None or tuple, the limits of the dy-axis

                - If set to None, the limits of the dy-axis are set automatically
                - If set to a tuple, the limits are manually set
        """
        heterogeneity = helpers.prep_confidence_interval(heterogeneity)
        centering = helpers.prep_centering(centering)

        # hack to fit the feature if not fitted
        self.eval(
            feature, np.array([self.axis_limits[0, feature]]), centering=centering
        )

        if show_avg_output:
            avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
        else:
            avg_output = None

        vis.ale_plot(
            self.feature_effect["feature_" + str(feature)],
            self.eval,
            feature,
            centering=centering,
            error=heterogeneity,
            scale_x=scale_x,
            scale_y=scale_y,
            title=self.method_name + " plot",
            avg_output=avg_output,
            feature_names=self.feature_names,
            target_name=self.target_name,
            y_limits=y_limits,
            dy_limits=dy_limits
        )

`eval(feature, xs, heterogeneity=False, centering=False)`

Evalueate the (RH)ALE feature effect of feature feature at points xs.

Notes

This is a common method inherited by both ALE and RHALE.

Parameters:

Name	Type	Description	Default
`feature`	`int`	index of feature of interest	required
`xs`	`np.ndarray`	the points along the s-th axis to evaluate the FE plot - `np.ndarray` of shape `(T, )`	required
`heterogeneity`	`bool`	whether to return heterogeneity: `False`, returns the mean effect `y` at the given `xs` `True`, returns a tuple `(y, H)` of two `ndarrays`; `y` is the mean effect and `H` is the heterogeneity evaluated at `xs`	`False`
`centering`	`typing.Union[bool, str]`	whether to center the plot: `False` means no centering `True` or `zero_integral` centers around the `y` axis. `zero_start` starts the plot from `y=0`.	`False`

Returns:

Type	Description
`typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]`	the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def eval(
    self,
    feature: int,
    xs: np.ndarray,
    heterogeneity: bool = False,
    centering: typing.Union[bool, str] = False,
) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
    """Evalueate the (RH)ALE feature effect of feature `feature` at points `xs`.

    Notes:
        This is a common method inherited by both ALE and RHALE.

    Args:
        feature: index of feature of interest
        xs: the points along the s-th axis to evaluate the FE plot
          - `np.ndarray` of shape `(T, )`
        heterogeneity: whether to return heterogeneity:

              - `False`, returns the mean effect `y` at the given `xs`
              - `True`, returns a tuple `(y, H)` of two `ndarrays`; `y` is the mean effect and `H` is the
              heterogeneity evaluated at `xs`

        centering: whether to center the plot:

            - `False` means no centering
            - `True` or `zero_integral` centers around the `y` axis.
            - `zero_start` starts the plot from `y=0`.
    Returns:
        the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

    """
    centering = helpers.prep_centering(centering)

    if self.refit(feature, centering):
        self.fit(features=feature, centering=centering)

    # Check if the lower bound is less than the upper bound
    assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

    # Evaluate the feature
    yy = self._eval_unnorm(feature, xs, heterogeneity=heterogeneity)
    y, std = yy if heterogeneity else (yy, None)

    # Center if asked
    y = (
        y - self.feature_effect["feature_" + str(feature)]["norm_const"]
        if centering
        else y
    )

    return (y, std) if heterogeneity is not False else y

`plot(feature, heterogeneity=False, centering=False, scale_x=None, scale_y=None, show_avg_output=False, y_limits=None, dy_limits=None)`

Plot the (RH)ALE feature effect of feature feature.

Notes

This is a common method inherited by both ALE and RHALE.

Parameters:

Name	Type	Description	Default
`feature`	`int`	the feature to plot	required
`heterogeneity`	`bool`	whether to plot the heterogeneity `False`, plots only the mean effect `True`, the std of the bin-effects will be plotted using a red vertical bar	`False`
`centering`	`Union[bool, str]`	whether to center the plot: `False` means no centering `True` or `zero_integral` centers around the `y` axis. `zero_start` starts the plot from `y=0`.	`False`
`scale_x`	`Optional[dict]`	None or Dict with keys ['std', 'mean'] If set to None, no scaling will be applied. If set to a dict, the x-axis will be scaled by the standard deviation and the mean.	`None`
`scale_y`	`Optional[dict]`	None or Dict with keys ['std', 'mean'] If set to None, no scaling will be applied. If set to a dict, the y-axis will be scaled by the standard deviation and the mean.	`None`
`show_avg_output`	`bool`	if True, the average output will be shown as a horizontal line.	`False`
`y_limits`	`Optional[List]`	None or tuple, the limits of the y-axis If set to None, the limits of the y-axis are set automatically If set to a tuple, the limits are manually set	`None`
`dy_limits`	`Optional[List]`	None or tuple, the limits of the dy-axis If set to None, the limits of the dy-axis are set automatically If set to a tuple, the limits are manually set	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def plot(
        self,
        feature: int,
        heterogeneity: bool = False,
        centering: Union[bool, str] = False,
        scale_x: Optional[dict] = None,
        scale_y: Optional[dict] = None,
        show_avg_output: bool = False,
        y_limits: Optional[List] = None,
        dy_limits: Optional[List] = None
):
    """
    Plot the (RH)ALE feature effect of feature `feature`.

    Notes:
        This is a common method inherited by both ALE and RHALE.

    Parameters:
        feature: the feature to plot
        heterogeneity: whether to plot the heterogeneity

              - `False`, plots only the mean effect
              - `True`, the std of the bin-effects will be plotted using a red vertical bar

        centering: whether to center the plot:

            - `False` means no centering
            - `True` or `zero_integral` centers around the `y` axis.
            - `zero_start` starts the plot from `y=0`.

        scale_x: None or Dict with keys ['std', 'mean']

            - If set to None, no scaling will be applied.
            - If set to a dict, the x-axis will be scaled by the standard deviation and the mean.
        scale_y: None or Dict with keys ['std', 'mean']

            - If set to None, no scaling will be applied.
            - If set to a dict, the y-axis will be scaled by the standard deviation and the mean.
        show_avg_output: if True, the average output will be shown as a horizontal line.
        y_limits: None or tuple, the limits of the y-axis

            - If set to None, the limits of the y-axis are set automatically
            - If set to a tuple, the limits are manually set

        dy_limits: None or tuple, the limits of the dy-axis

            - If set to None, the limits of the dy-axis are set automatically
            - If set to a tuple, the limits are manually set
    """
    heterogeneity = helpers.prep_confidence_interval(heterogeneity)
    centering = helpers.prep_centering(centering)

    # hack to fit the feature if not fitted
    self.eval(
        feature, np.array([self.axis_limits[0, feature]]), centering=centering
    )

    if show_avg_output:
        avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
    else:
        avg_output = None

    vis.ale_plot(
        self.feature_effect["feature_" + str(feature)],
        self.eval,
        feature,
        centering=centering,
        error=heterogeneity,
        scale_x=scale_x,
        scale_y=scale_y,
        title=self.method_name + " plot",
        avg_output=avg_output,
        feature_names=self.feature_names,
        target_name=self.target_name,
        y_limits=y_limits,
        dy_limits=dy_limits
    )

`effector.global_effect_ale.ALE`

Bases: ALEBase

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

class ALE(ALEBase):
    def __init__(
            self,
            data: np.ndarray,
            model: callable,
            nof_instances: Union[int, str] = "all",
            axis_limits: Optional[np.ndarray] = None,
            avg_output: Optional[float] = None,
            feature_names: Optional[List] = None,
            target_name: Optional[str] = None,
    ):
        """
        Constructor for the ALE plot.

        Definition:
            ALE is defined as:
            $$
            \hat{f}^{ALE}(x_s) = TODO
            $$

            The heterogeneity is:
            $$
            TODO
            $$

            The std of the bin-effects is:
            $$
            TODO
            $$

        Notes:
            - The required parameters are `data` and `model`. The rest are optional.

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, )`

            nof_instances: the number of instances to use for the explanation

                - use an `int`, to specify the number of instances
                - use `"all"`, to use all the instances

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            avg_output: the average output of the model on the data

                - use a `float`, to specify it manually
                - use `None`, to be inferred as `np.mean(model(data))`

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """
        super(ALE, self).__init__(
            data, model, nof_instances, axis_limits, avg_output, feature_names, target_name, "ALE"
        )

    def _fit_feature(self, feature: int, binning_method="fixed") -> typing.Dict:

        # drop points outside of limits
        ind = np.logical_and(
            self.data[:, feature] >= self.axis_limits[0, feature],
            self.data[:, feature] <= self.axis_limits[1, feature],
        )
        data = self.data[ind, :]

        # assertion
        assert binning_method == "fixed" or isinstance(
            binning_method, bm.Fixed
        ), "ALE can work only with the fixed binning method!"

        if isinstance(binning_method, str):
            binning_method = bm.Fixed(nof_bins=20, min_points_per_bin=0)
        bin_est = bm.find_limits(data, None, feature, self.axis_limits, binning_method)
        bin_name = bin_est.__class__.__name__

        # assert bins can be computed else raise error
        assert bin_est.limits is not False, (
            "Impossible to compute bins with enough points for feature "
            + str(feature + 1)
            + " and binning strategy: "
            + bin_name
            + ". Change bin strategy or "
            "the parameters of the method"
        )

        # compute data effect on bin limits
        data_effect = utils.compute_local_effects(
            data, self.model, bin_est.limits, feature
        )

        # compute the bin effect
        dale_params = utils.compute_ale_params(
            data[:, feature], data_effect, bin_est.limits
        )
        dale_params["alg_params"] = "fixed"
        return dale_params

    def fit(
        self,
        features: typing.Union[int, str, list] = "all",
        binning_method: typing.Union[str, bm.Fixed] = "fixed",
        centering: typing.Union[bool, str] = "zero_integral",
    ) -> None:
        """Fit the ALE plot.

        Args:
            features: the features to fit. If set to "all", all the features will be fitted.

            binning_method:

                - If set to `"fixed"`, the ALE plot will be computed with the  default values, which are
                `20` bins with at least `10` points per bin and the featue is considered as categorical if it has
                less than `15` unique values.
                - If you want to change the parameters of the method, you pass an instance of the
                class `effector.binning_methods.Fixed` with the desired parameters.
                For example: `Fixed(nof_bins=20, min_points_per_bin=0, cat_limit=10)`

            centering: whether to compute the normalization constant for centering the plot:

                - `False` means no centering
                - `True` or `zero_integral` centers around the `y` axis.
                - `zero_start` starts the plot from `y=0`.
        """
        assert binning_method == "fixed" or isinstance(
            binning_method, bm.Fixed
        ), "ALE can work only with the fixed binning method!"

        self._fit_loop(features, binning_method, centering)

`init(data, model, nof_instances='all', axis_limits=None, avg_output=None, feature_names=None, target_name=None)`

Constructor for the ALE plot.

Definition

ALE is defined as: $$ \hat{f}^{ALE}(x_s) = TODO $$

The heterogeneity is: $$ TODO $$

The std of the bin-effects is: $$ TODO $$

Notes

The required parameters are data and model. The rest are optional.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, )`	required
`nof_instances`	`Union[int, str]`	the number of instances to use for the explanation use an `int`, to specify the number of instances use `"all"`, to use all the instances	`'all'`
`axis_limits`	`Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`avg_output`	`Optional[float]`	the average output of the model on the data use a `float`, to specify it manually use `None`, to be inferred as `np.mean(model(data))`	`None`
`feature_names`	`Optional[List]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def __init__(
        self,
        data: np.ndarray,
        model: callable,
        nof_instances: Union[int, str] = "all",
        axis_limits: Optional[np.ndarray] = None,
        avg_output: Optional[float] = None,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
):
    """
    Constructor for the ALE plot.

    Definition:
        ALE is defined as:
        $$
        \hat{f}^{ALE}(x_s) = TODO
        $$

        The heterogeneity is:
        $$
        TODO
        $$

        The std of the bin-effects is:
        $$
        TODO
        $$

    Notes:
        - The required parameters are `data` and `model`. The rest are optional.

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, )`

        nof_instances: the number of instances to use for the explanation

            - use an `int`, to specify the number of instances
            - use `"all"`, to use all the instances

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        avg_output: the average output of the model on the data

            - use a `float`, to specify it manually
            - use `None`, to be inferred as `np.mean(model(data))`

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """
    super(ALE, self).__init__(
        data, model, nof_instances, axis_limits, avg_output, feature_names, target_name, "ALE"
    )

`fit(features='all', binning_method='fixed', centering='zero_integral')`

Fit the ALE plot.

Parameters:

Name	Type	Description	Default
`features`	`typing.Union[int, str, list]`	the features to fit. If set to "all", all the features will be fitted.	`'all'`
`binning_method`	`typing.Union[str, bm.Fixed]`	If set to `"fixed"`, the ALE plot will be computed with the default values, which are `20` bins with at least `10` points per bin and the featue is considered as categorical if it has less than `15` unique values. If you want to change the parameters of the method, you pass an instance of the class `effector.binning_methods.Fixed` with the desired parameters. For example: `Fixed(nof_bins=20, min_points_per_bin=0, cat_limit=10)`	`'fixed'`
`centering`	`typing.Union[bool, str]`	whether to compute the normalization constant for centering the plot: `False` means no centering `True` or `zero_integral` centers around the `y` axis. `zero_start` starts the plot from `y=0`.	`'zero_integral'`

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def fit(
    self,
    features: typing.Union[int, str, list] = "all",
    binning_method: typing.Union[str, bm.Fixed] = "fixed",
    centering: typing.Union[bool, str] = "zero_integral",
) -> None:
    """Fit the ALE plot.

    Args:
        features: the features to fit. If set to "all", all the features will be fitted.

        binning_method:

            - If set to `"fixed"`, the ALE plot will be computed with the  default values, which are
            `20` bins with at least `10` points per bin and the featue is considered as categorical if it has
            less than `15` unique values.
            - If you want to change the parameters of the method, you pass an instance of the
            class `effector.binning_methods.Fixed` with the desired parameters.
            For example: `Fixed(nof_bins=20, min_points_per_bin=0, cat_limit=10)`

        centering: whether to compute the normalization constant for centering the plot:

            - `False` means no centering
            - `True` or `zero_integral` centers around the `y` axis.
            - `zero_start` starts the plot from `y=0`.
    """
    assert binning_method == "fixed" or isinstance(
        binning_method, bm.Fixed
    ), "ALE can work only with the fixed binning method!"

    self._fit_loop(features, binning_method, centering)

`effector.global_effect_ale.RHALE`

Bases: ALEBase

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

class RHALE(ALEBase):
    def __init__(
            self,
            data: np.ndarray,
            model: callable,
            model_jac: typing.Union[None, callable] = None,
            nof_instances: typing.Union[int, str] = "all",
            axis_limits: typing.Optional[np.ndarray] = None,
            data_effect: typing.Optional[np.ndarray] = None,
            avg_output: typing.Optional[float] = None,
            feature_names: typing.Optional[list] = None,
            target_name: typing.Optional[str] = None,
    ):
        """
        Constructor for RHALE.

        Definition:
            RHALE is defined as:
            $$
            \hat{f}^{RHALE}(x_s) = TODO
            $$

            The heterogeneity is:
            $$
            TODO
            $$

            The std of the bin-effects is:
            $$
            TODO
            $$

        Notes:
            The required parameters are `data` and `model`. The rest are optional.

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, )`

            model_jac: the Jacobian of the model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, D)`

            nof_instances: the number of instances to use for the explanation

                - use an `int`, to specify the number of instances
                - use `"all"`, to use all the instances

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            data_effect:
                - if np.ndarray, the model Jacobian computed on the `data`
                - if None, the Jacobian will be computed using model_jac

            avg_output: the average output of the model on the data

                - use a `float`, to specify it manually
                - use `None`, to be inferred as `np.mean(model(data))`

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """
        self.model_jac = model_jac

        # select nof_instances from the data
        nof_instances, indices = helpers.prep_nof_instances(nof_instances, data.shape[0])
        data = data[indices, :]
        data_effect = data_effect[indices, :] if data_effect is not None else None
        self.data_effect = data_effect

        super(RHALE, self).__init__(
            data, model, "all", axis_limits, avg_output, feature_names, target_name, "RHALE"
        )

    def compile(self):
        """Prepare everything for fitting, i.e., compute the gradients on data points.
        """
        if self.data_effect is None and self.model_jac is not None:
            self.data_effect = self.model_jac(self.data)
        elif self.data_effect is None and self.model_jac is None:
            self.data_effect = utils.compute_jacobian_numerically(self.model, self.data)

    def _fit_feature(
            self,
            feature: int,
            binning_method: Union[str, bm.DynamicProgramming, bm.Greedy, bm.Fixed] = "greedy"
    ) -> typing.Dict:
        if self.data_effect is None:
            self.compile()

        # drop points outside of limits
        ind = np.logical_and(
            self.data[:, feature] >= self.axis_limits[0, feature],
            self.data[:, feature] <= self.axis_limits[1, feature],
        )
        data = self.data[ind, :]
        data_effect = self.data_effect[ind, :]

        # bin estimation
        bin_est = bm.find_limits(
            data, data_effect, feature, self.axis_limits, binning_method
        )
        bin_name = bin_est.__class__.__name__

        # assert bins can be computed else raise error
        assert bin_est.limits is not False, (
            "Impossible to compute bins with enough points for feature "
            + str(feature + 1)
            + " and binning strategy: "
            + bin_name
            + ". Change bin strategy or "
            "the parameters of the method"
        )

        # compute the bin effect
        dale_params = utils.compute_ale_params(
            data[:, feature], data_effect[:, feature], bin_est.limits
        )
        dale_params["alg_params"] = binning_method
        return dale_params

    def fit(
        self,
        features: typing.Union[int, str, list] = "all",
        binning_method: typing.Union[str, bm.DynamicProgramming, bm.Greedy, bm.Fixed] = "greedy",
        centering: typing.Union[bool, str] = False,
    ) -> None:
        """Fit the model.

        Args:
            features (int, str, list): the features to fit.

                - If set to "all", all the features will be fitted.

            binning_method (str): the binning method to use.

                - Use `"greedy"` for using the Greedy binning solution with the default parameters.
                  For custom parameters initialize a `binning_methods.Greedy` object
                - Use `"dp"` for using a Dynamic Programming binning solution with the default parameters.
                  For custom parameters initialize a `binning_methods.DynamicProgramming` object
                - Use `"fixed"` for using a Fixed binning solution with the default parameters.
                  For custom parameters initialize a `binning_methods.Fixed` object

            centering: whether to compute the normalization constant for centering the plot:

                - `False` means no centering
                - `True` or `zero_integral` centers around the `y` axis
                - `zero_start` starts the plot from `y=0`
        """
        assert binning_method in [
            "greedy",
            "dynamic",
            "fixed"
        ] or isinstance(
            binning_method, bm.Greedy
        ) or isinstance(
            binning_method, bm.DynamicProgramming
        ) or isinstance(
            binning_method, bm.Fixed
        ), "Unknown binning method!"

        self._fit_loop(features, binning_method, centering)

`init(data, model, model_jac=None, nof_instances='all', axis_limits=None, data_effect=None, avg_output=None, feature_names=None, target_name=None)`

Constructor for RHALE.

Definition

RHALE is defined as: $$ \hat{f}^{RHALE}(x_s) = TODO $$

The heterogeneity is: $$ TODO $$

The std of the bin-effects is: $$ TODO $$

Notes

The required parameters are data and model. The rest are optional.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, )`	required
`model_jac`	`typing.Union[None, callable]`	the Jacobian of the model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, D)`	`None`
`nof_instances`	`typing.Union[int, str]`	the number of instances to use for the explanation use an `int`, to specify the number of instances use `"all"`, to use all the instances	`'all'`
`axis_limits`	`typing.Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`data_effect`	`typing.Optional[np.ndarray]`	if np.ndarray, the model Jacobian computed on the `data` if None, the Jacobian will be computed using model_jac	`None`
`avg_output`	`typing.Optional[float]`	the average output of the model on the data use a `float`, to specify it manually use `None`, to be inferred as `np.mean(model(data))`	`None`
`feature_names`	`typing.Optional[list]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`typing.Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def __init__(
        self,
        data: np.ndarray,
        model: callable,
        model_jac: typing.Union[None, callable] = None,
        nof_instances: typing.Union[int, str] = "all",
        axis_limits: typing.Optional[np.ndarray] = None,
        data_effect: typing.Optional[np.ndarray] = None,
        avg_output: typing.Optional[float] = None,
        feature_names: typing.Optional[list] = None,
        target_name: typing.Optional[str] = None,
):
    """
    Constructor for RHALE.

    Definition:
        RHALE is defined as:
        $$
        \hat{f}^{RHALE}(x_s) = TODO
        $$

        The heterogeneity is:
        $$
        TODO
        $$

        The std of the bin-effects is:
        $$
        TODO
        $$

    Notes:
        The required parameters are `data` and `model`. The rest are optional.

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, )`

        model_jac: the Jacobian of the model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, D)`

        nof_instances: the number of instances to use for the explanation

            - use an `int`, to specify the number of instances
            - use `"all"`, to use all the instances

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        data_effect:
            - if np.ndarray, the model Jacobian computed on the `data`
            - if None, the Jacobian will be computed using model_jac

        avg_output: the average output of the model on the data

            - use a `float`, to specify it manually
            - use `None`, to be inferred as `np.mean(model(data))`

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """
    self.model_jac = model_jac

    # select nof_instances from the data
    nof_instances, indices = helpers.prep_nof_instances(nof_instances, data.shape[0])
    data = data[indices, :]
    data_effect = data_effect[indices, :] if data_effect is not None else None
    self.data_effect = data_effect

    super(RHALE, self).__init__(
        data, model, "all", axis_limits, avg_output, feature_names, target_name, "RHALE"
    )

`fit(features='all', binning_method='greedy', centering=False)`

Fit the model.

Parameters:

Name	Type	Description	Default
`features`	`int, str, list`	the features to fit. If set to "all", all the features will be fitted.	`'all'`
`binning_method`	`str`	the binning method to use. Use `"greedy"` for using the Greedy binning solution with the default parameters. For custom parameters initialize a `binning_methods.Greedy` object Use `"dp"` for using a Dynamic Programming binning solution with the default parameters. For custom parameters initialize a `binning_methods.DynamicProgramming` object Use `"fixed"` for using a Fixed binning solution with the default parameters. For custom parameters initialize a `binning_methods.Fixed` object	`'greedy'`
`centering`	`typing.Union[bool, str]`	whether to compute the normalization constant for centering the plot: `False` means no centering `True` or `zero_integral` centers around the `y` axis `zero_start` starts the plot from `y=0`	`False`

Source code in /home/runner/work/effector/effector/effector/global_effect_ale.py

def fit(
    self,
    features: typing.Union[int, str, list] = "all",
    binning_method: typing.Union[str, bm.DynamicProgramming, bm.Greedy, bm.Fixed] = "greedy",
    centering: typing.Union[bool, str] = False,
) -> None:
    """Fit the model.

    Args:
        features (int, str, list): the features to fit.

            - If set to "all", all the features will be fitted.

        binning_method (str): the binning method to use.

            - Use `"greedy"` for using the Greedy binning solution with the default parameters.
              For custom parameters initialize a `binning_methods.Greedy` object
            - Use `"dp"` for using a Dynamic Programming binning solution with the default parameters.
              For custom parameters initialize a `binning_methods.DynamicProgramming` object
            - Use `"fixed"` for using a Fixed binning solution with the default parameters.
              For custom parameters initialize a `binning_methods.Fixed` object

        centering: whether to compute the normalization constant for centering the plot:

            - `False` means no centering
            - `True` or `zero_integral` centers around the `y` axis
            - `zero_start` starts the plot from `y=0`
    """
    assert binning_method in [
        "greedy",
        "dynamic",
        "fixed"
    ] or isinstance(
        binning_method, bm.Greedy
    ) or isinstance(
        binning_method, bm.DynamicProgramming
    ) or isinstance(
        binning_method, bm.Fixed
    ), "Unknown binning method!"

    self._fit_loop(features, binning_method, centering)

`effector.global_effect_pdp.PDPBase`

Bases: GlobalEffectBase

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

class PDPBase(GlobalEffectBase):
    def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        model_jac: Optional[Callable] = None,
        axis_limits: Optional[np.ndarray] = None,
        avg_output: Optional[float] = None,
        nof_instances: Union[int, str] = 300,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
        method_name: str = "PDP",
    ):
        """
        Constructor of the PDPBase class.
        """

        self.model_jac = model_jac

        super(PDPBase, self).__init__(
            method_name,
            data,
            model, nof_instances, axis_limits, avg_output, feature_names, target_name
        )

    def _predict(self, data, xx, feature):
        if self.method_name == "pdp":
            y = pdp_1d_vectorized(
                self.model, data, xx, feature, False, False, True
            )
        else:
            if self.model_jac is not None:
                y = pdp_1d_vectorized(self.model_jac, self.data, xx, feature, False, True, True)
            else:
                y = pdp_1d_vectorized(self.model, self.data, xx, feature, False, False, True, True)
        return y

    def _fit_feature(
        self,
        feature: int,
        centering: Union[bool, str] = False,
        points_for_centering: int = 100,
    ) -> dict:

        # drop points outside of limits
        self.data = self.data[self.data[:, feature] >= self.axis_limits[0, feature]]
        self.data = self.data[self.data[:, feature] <= self.axis_limits[1, feature]]
        data = self.data

        if centering is True or centering == "zero_integral":
            xx = np.linspace(
                self.axis_limits[0, feature],
                self.axis_limits[1, feature],
                points_for_centering,
            )
            y = self._predict(data, xx, feature)
            norm_const = np.mean(y, axis=0)
            fe = {"norm_const": norm_const}
        elif centering == "zero_start":
            xx = self.axis_limits[0, feature, np.newaxis]
            y = self._predict(data, xx, feature)
            fe = {"norm_const": y[0]}
        else:
            fe = {"norm_const": helpers.EMPTY_SYMBOL}
        return fe

    def fit(
        self,
        features: Union[int, str, list] = "all",
        centering: Union[bool, str] = True,
        points_for_centering: int = 100,
    ):
        """
        Fit the PDP or d-PDP.

        Notes:
            You can use `.eval` or `.plot` without calling `.fit` explicitly.
            The only thing that `.fit` does is to compute the normalization constant for centering the PDP and ICE plots.
            This will be automatically done when calling `eval` or `plot`, so there is no need to call `fit` explicitly.

        Args:
            features: the features to fit.
                - If set to "all", all the features will be fitted.

            centering: whether to center the plot:

                - `False` means no centering
                - `True` or `zero_integral` centers around the `y` axis.
                - `zero_start` starts the plot from `y=0`.

            points_for_centering: number of linspaced points along the feature axis used for centering.

                - If set to `"all"`, all the dataset points will be used.

        """
        centering = helpers.prep_centering(centering)
        features = helpers.prep_features(features, self.dim)

        for s in features:
            self.feature_effect["feature_" + str(s)] = self._fit_feature(
                s, centering, points_for_centering
            )
            self.is_fitted[s] = True
            self.method_args["feature_" + str(s)] = {
                "centering": centering,
                "points_for_centering": points_for_centering,
            }

    def eval(
        self,
        feature: int,
        xs: np.ndarray,
        heterogeneity: bool = False,
        centering: typing.Union[bool, str] = False,
        return_all: bool = False,
    ) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
        """Evaluate the effect of the s-th feature at positions `xs`.

        Args:
            feature: index of feature of interest
            xs: the points along the s-th axis to evaluate the FE plot

              - `np.ndarray` of shape `(T, )`

            heterogeneity: whether to return the heterogeneity measures.

                  - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
                  - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

            centering: whether to center the PDP

                - If `centering` is `False`, the PDP not centered
                - If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis.
                - If `centering` is `zero_start`, the PDP starts from `y=0`.

            return_all: whether to return PDP and ICE plots evaluated at `xs`

                - If `return_all=False`, the function returns the mean effect at the given `xs`
                - If `return_all=True`, the function returns a `ndarray` of shape `(T, N)` with the `N` ICE plots evaluated at `xs`

        Returns:
            the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

        """
        centering = helpers.prep_centering(centering)

        if self.refit(feature, centering):
            self.fit(features=feature, centering=centering)

        # Check if the lower bound is less than the upper bound
        assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

        # new implementation
        yy = self._predict(self.data, xs, feature)

        if centering:
            norm_consts = np.expand_dims(
                self.feature_effect["feature_" + str(feature)]["norm_const"], axis=0
            )
            yy = yy - norm_consts

        y_pdp = np.mean(yy, axis=1)

        if return_all:
            return yy

        if heterogeneity:
            std = np.std(yy, axis=1)
            return y_pdp, std
        else:
            return y_pdp

    def plot(
        self,
        feature: int,
        heterogeneity: Union[bool, str] = False,
        centering: Union[bool, str] = False,
        nof_points: int = 30,
        scale_x: Optional[dict] = None,
        scale_y: Optional[dict] = None,
        nof_ice: Union[int, str] = "all",
        show_avg_output: bool = False,
        y_limits: Optional[List] = None,
    ):
        """
        Plot the PDP or d-PDP.

        Args:
            feature: index of the plotted feature
            heterogeneity: whether to output the heterogeneity of the SHAP values

                - If `heterogeneity` is `False`, no heterogeneity is plotted
                - If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted
                - If `heterogeneity` is `ice`, the ICE plots are plotted

            centering: whether to center the PDP

                - If `centering` is `False`, the PDP not centered
                - If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis.
                - If `centering` is `zero_start`, the PDP starts from `y=0`.

            nof_points: number of points to evaluate the SDP plot
            scale_x: dictionary with keys "mean" and "std" for scaling the x-axis
            scale_y: dictionary with keys "mean" and "std" for scaling the y-axis
            nof_ice: number of shap values to show on top of the SHAP curve
            show_avg_output: whether to show the average output of the model
            y_limits: limits of the y-axis
        """
        heterogeneity = helpers.prep_confidence_interval(heterogeneity)
        centering = helpers.prep_centering(centering)

        x = np.linspace(
            self.axis_limits[0, feature], self.axis_limits[1, feature], nof_points
        )

        yy = self.eval(
            feature, x, heterogeneity=False, centering=centering, return_all=True
        )

        if show_avg_output:
            avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
        else:
            avg_output = None

        title = "PDP" if self.method_name == "pdp" else "d-PDP"
        vis.plot_pdp_ice(
            x,
            feature,
            yy=yy,
            title=title,
            confidence_interval=heterogeneity,
            y_pdp_label="PDP" if self.method_name == "pdp" else "d-PDP",
            y_ice_label="ICE" if self.method_name == "pdp" else "d-ICE",
            scale_x=scale_x,
            scale_y=scale_y,
            avg_output=avg_output,
            feature_names=self.feature_names,
            target_name=self.target_name,
            nof_ice=nof_ice,
            y_limits=y_limits,
        )

`fit(features='all', centering=True, points_for_centering=100)`

Fit the PDP or d-PDP.

Notes

You can use .eval or .plot without calling .fit explicitly. The only thing that .fit does is to compute the normalization constant for centering the PDP and ICE plots. This will be automatically done when calling eval or plot, so there is no need to call fit explicitly.

Parameters:

Name	Type	Description	Default
`features`	`Union[int, str, list]`	the features to fit. - If set to "all", all the features will be fitted.	`'all'`
`centering`	`Union[bool, str]`	whether to center the plot: `False` means no centering `True` or `zero_integral` centers around the `y` axis. `zero_start` starts the plot from `y=0`.	`True`
`points_for_centering`	`int`	number of linspaced points along the feature axis used for centering. If set to `"all"`, all the dataset points will be used.	`100`

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

def fit(
    self,
    features: Union[int, str, list] = "all",
    centering: Union[bool, str] = True,
    points_for_centering: int = 100,
):
    """
    Fit the PDP or d-PDP.

    Notes:
        You can use `.eval` or `.plot` without calling `.fit` explicitly.
        The only thing that `.fit` does is to compute the normalization constant for centering the PDP and ICE plots.
        This will be automatically done when calling `eval` or `plot`, so there is no need to call `fit` explicitly.

    Args:
        features: the features to fit.
            - If set to "all", all the features will be fitted.

        centering: whether to center the plot:

            - `False` means no centering
            - `True` or `zero_integral` centers around the `y` axis.
            - `zero_start` starts the plot from `y=0`.

        points_for_centering: number of linspaced points along the feature axis used for centering.

            - If set to `"all"`, all the dataset points will be used.

    """
    centering = helpers.prep_centering(centering)
    features = helpers.prep_features(features, self.dim)

    for s in features:
        self.feature_effect["feature_" + str(s)] = self._fit_feature(
            s, centering, points_for_centering
        )
        self.is_fitted[s] = True
        self.method_args["feature_" + str(s)] = {
            "centering": centering,
            "points_for_centering": points_for_centering,
        }

`eval(feature, xs, heterogeneity=False, centering=False, return_all=False)`

Evaluate the effect of the s-th feature at positions xs.

Parameters:

Name	Type	Description	Default
`feature`	`int`	index of feature of interest	required
`xs`	`np.ndarray`	the points along the s-th axis to evaluate the FE plot `np.ndarray` of shape `(T, )`	required
`heterogeneity`	`bool`	whether to return the heterogeneity measures. if `heterogeneity=False`, the function returns the mean effect at the given `xs` If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect	`False`
`centering`	`typing.Union[bool, str]`	whether to center the PDP If `centering` is `False`, the PDP not centered If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis. If `centering` is `zero_start`, the PDP starts from `y=0`.	`False`
`return_all`	`bool`	whether to return PDP and ICE plots evaluated at `xs` If `return_all=False`, the function returns the mean effect at the given `xs` If `return_all=True`, the function returns a `ndarray` of shape `(T, N)` with the `N` ICE plots evaluated at `xs`	`False`

Returns:

Type	Description
`typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]`	the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

def eval(
    self,
    feature: int,
    xs: np.ndarray,
    heterogeneity: bool = False,
    centering: typing.Union[bool, str] = False,
    return_all: bool = False,
) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
    """Evaluate the effect of the s-th feature at positions `xs`.

    Args:
        feature: index of feature of interest
        xs: the points along the s-th axis to evaluate the FE plot

          - `np.ndarray` of shape `(T, )`

        heterogeneity: whether to return the heterogeneity measures.

              - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
              - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

        centering: whether to center the PDP

            - If `centering` is `False`, the PDP not centered
            - If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis.
            - If `centering` is `zero_start`, the PDP starts from `y=0`.

        return_all: whether to return PDP and ICE plots evaluated at `xs`

            - If `return_all=False`, the function returns the mean effect at the given `xs`
            - If `return_all=True`, the function returns a `ndarray` of shape `(T, N)` with the `N` ICE plots evaluated at `xs`

    Returns:
        the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

    """
    centering = helpers.prep_centering(centering)

    if self.refit(feature, centering):
        self.fit(features=feature, centering=centering)

    # Check if the lower bound is less than the upper bound
    assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

    # new implementation
    yy = self._predict(self.data, xs, feature)

    if centering:
        norm_consts = np.expand_dims(
            self.feature_effect["feature_" + str(feature)]["norm_const"], axis=0
        )
        yy = yy - norm_consts

    y_pdp = np.mean(yy, axis=1)

    if return_all:
        return yy

    if heterogeneity:
        std = np.std(yy, axis=1)
        return y_pdp, std
    else:
        return y_pdp

`plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_ice='all', show_avg_output=False, y_limits=None)`

Plot the PDP or d-PDP.

Parameters:

Name	Type	Description	Default
`feature`	`int`	index of the plotted feature	required
`heterogeneity`	`Union[bool, str]`	whether to output the heterogeneity of the SHAP values If `heterogeneity` is `False`, no heterogeneity is plotted If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted If `heterogeneity` is `ice`, the ICE plots are plotted	`False`
`centering`	`Union[bool, str]`	whether to center the PDP If `centering` is `False`, the PDP not centered If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis. If `centering` is `zero_start`, the PDP starts from `y=0`.	`False`
`nof_points`	`int`	number of points to evaluate the SDP plot	`30`
`scale_x`	`Optional[dict]`	dictionary with keys "mean" and "std" for scaling the x-axis	`None`
`scale_y`	`Optional[dict]`	dictionary with keys "mean" and "std" for scaling the y-axis	`None`
`nof_ice`	`Union[int, str]`	number of shap values to show on top of the SHAP curve	`'all'`
`show_avg_output`	`bool`	whether to show the average output of the model	`False`
`y_limits`	`Optional[List]`	limits of the y-axis	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

def plot(
    self,
    feature: int,
    heterogeneity: Union[bool, str] = False,
    centering: Union[bool, str] = False,
    nof_points: int = 30,
    scale_x: Optional[dict] = None,
    scale_y: Optional[dict] = None,
    nof_ice: Union[int, str] = "all",
    show_avg_output: bool = False,
    y_limits: Optional[List] = None,
):
    """
    Plot the PDP or d-PDP.

    Args:
        feature: index of the plotted feature
        heterogeneity: whether to output the heterogeneity of the SHAP values

            - If `heterogeneity` is `False`, no heterogeneity is plotted
            - If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted
            - If `heterogeneity` is `ice`, the ICE plots are plotted

        centering: whether to center the PDP

            - If `centering` is `False`, the PDP not centered
            - If `centering` is `True` or `zero_integral`, the PDP is centered around the `y` axis.
            - If `centering` is `zero_start`, the PDP starts from `y=0`.

        nof_points: number of points to evaluate the SDP plot
        scale_x: dictionary with keys "mean" and "std" for scaling the x-axis
        scale_y: dictionary with keys "mean" and "std" for scaling the y-axis
        nof_ice: number of shap values to show on top of the SHAP curve
        show_avg_output: whether to show the average output of the model
        y_limits: limits of the y-axis
    """
    heterogeneity = helpers.prep_confidence_interval(heterogeneity)
    centering = helpers.prep_centering(centering)

    x = np.linspace(
        self.axis_limits[0, feature], self.axis_limits[1, feature], nof_points
    )

    yy = self.eval(
        feature, x, heterogeneity=False, centering=centering, return_all=True
    )

    if show_avg_output:
        avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
    else:
        avg_output = None

    title = "PDP" if self.method_name == "pdp" else "d-PDP"
    vis.plot_pdp_ice(
        x,
        feature,
        yy=yy,
        title=title,
        confidence_interval=heterogeneity,
        y_pdp_label="PDP" if self.method_name == "pdp" else "d-PDP",
        y_ice_label="ICE" if self.method_name == "pdp" else "d-ICE",
        scale_x=scale_x,
        scale_y=scale_y,
        avg_output=avg_output,
        feature_names=self.feature_names,
        target_name=self.target_name,
        nof_ice=nof_ice,
        y_limits=y_limits,
    )

`effector.global_effect_pdp.PDP`

Bases: PDPBase

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

class PDP(PDPBase):
    def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        axis_limits: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = 300,
        avg_output: Optional[float] = None,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
    ):
        """
        Constructor of the PDP class.

        Definition:
            PDP is defined as:
            $$
            \hat{f}^{PDP}(x_s) = {1 \over N} \sum_{i=1}^N f(x_s, x_C^{(i)})b
            $$

            The ICE plots are:
            $$
            \hat{f}^{(i)}(x_s) = f(x_s, x_C^{(i)}), \quad i=1, \dots, N
            $$

            The heterogeneity is:
            $$
            \mathcal{H}^{PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^{(i)}(x_s) - \hat{f}^{PDP}(x_s) )^2}
            $$

        Notes:
            The required parameters are `data` and `model`. The rest are optional.

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, )`

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            nof_instances: maximum number of instances to be used for PDP.

                - use "all", for using all instances.
                - use an `int`, for using `nof_instances` instances.

            avg_output: The average output of the model.

                - use a `float`, to specify it manually
                - use `None`, to be inferred as `np.mean(model(data))`

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """

        super(PDP, self).__init__(
            data, model, None, axis_limits, avg_output, nof_instances, feature_names, target_name, method_name="PDP"
        )

`init(data, model, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)`

Constructor of the PDP class.

Definition

PDP is defined as: $$ \hat{f}^{PDP}(x_s) = {1 \over N} \sum_{i=1}^N f(x_s, x_C^{(i)})b $$

The ICE plots are: $$ \hat{f}^{(i)}(x_s) = f(x_s, x_C^{(i)}), \quad i=1, \dots, N $$

The heterogeneity is: $$ \mathcal{H}^{PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^{(i)}(x_s) - \hat{f}^{PDP}(x_s) )^2} $$

Notes

The required parameters are data and model. The rest are optional.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`Callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, )`	required
`axis_limits`	`Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`nof_instances`	`Union[int, str]`	maximum number of instances to be used for PDP. use "all", for using all instances. use an `int`, for using `nof_instances` instances.	`300`
`avg_output`	`Optional[float]`	The average output of the model. use a `float`, to specify it manually use `None`, to be inferred as `np.mean(model(data))`	`None`
`feature_names`	`Optional[List]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

def __init__(
    self,
    data: np.ndarray,
    model: Callable,
    axis_limits: Optional[np.ndarray] = None,
    nof_instances: Union[int, str] = 300,
    avg_output: Optional[float] = None,
    feature_names: Optional[List] = None,
    target_name: Optional[str] = None,
):
    """
    Constructor of the PDP class.

    Definition:
        PDP is defined as:
        $$
        \hat{f}^{PDP}(x_s) = {1 \over N} \sum_{i=1}^N f(x_s, x_C^{(i)})b
        $$

        The ICE plots are:
        $$
        \hat{f}^{(i)}(x_s) = f(x_s, x_C^{(i)}), \quad i=1, \dots, N
        $$

        The heterogeneity is:
        $$
        \mathcal{H}^{PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^{(i)}(x_s) - \hat{f}^{PDP}(x_s) )^2}
        $$

    Notes:
        The required parameters are `data` and `model`. The rest are optional.

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, )`

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        nof_instances: maximum number of instances to be used for PDP.

            - use "all", for using all instances.
            - use an `int`, for using `nof_instances` instances.

        avg_output: The average output of the model.

            - use a `float`, to specify it manually
            - use `None`, to be inferred as `np.mean(model(data))`

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """

    super(PDP, self).__init__(
        data, model, None, axis_limits, avg_output, nof_instances, feature_names, target_name, method_name="PDP"
    )

`effector.global_effect_pdp.DerPDP`

Bases: PDPBase

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

class DerPDP(PDPBase):
    def __init__(
            self,
            data: np.ndarray,
            model: Callable,
            model_jac: Optional[Callable] = None,
            axis_limits: Optional[np.ndarray] = None,
            nof_instances: Union[int, str] = 300,
            avg_output: Optional[float] = None,
            feature_names: Optional[List] = None,
            target_name: Optional[str] = None,
    ):
        """
        Constructor of the DerivativePDP class.

        Definition:
            d-PDP is defined as:
            $$
            \hat{f}^{d-PDP}(x_s) = {1 \over N} \sum_{i=1}^N {df \over d x_s} (x_s, x_C^i)
            $$

            The d-ICE plots are:
            $$
            \hat{f}^i(x_s) = {df \over d x_s}(x_s, x_C^i), \quad i=1, \dots, N
            $$

            The heterogeneity is:
            $$
            \mathcal{H}^{d-PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^i(x_s) - \hat{f}^{d-PDP}(x_s) )^2}
            $$

        Notes:
            - The required parameters are `data` and `model`. The rest are optional.
            - The `model_jac` is the Jacobian of the model. If `None`, the Jacobian will be computed numerically.

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, )`

            model_jac: the black-box model Jacobian. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, D)`

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            nof_instances: maximum number of instances to be used for PDP.

                - use "all", for using all instances.
                - use an `int`, for using `nof_instances` instances.

            avg_output: The average output of the model.

                - use a `float`, to specify it manually
                - use `None`, to be inferred as `np.mean(model(data))`

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """

        super(DerPDP, self).__init__(
            data, model, model_jac, axis_limits, avg_output, nof_instances, feature_names, target_name, method_name="d-PDP"
        )

`init(data, model, model_jac=None, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)`

Constructor of the DerivativePDP class.

Definition

d-PDP is defined as: $$ \hat{f}^{d-PDP}(x_s) = {1 \over N} \sum_{i=1}^N {df \over d x_s} (x_s, x_C^i) $$

The d-ICE plots are: $$ \hat{f}^i(x_s) = {df \over d x_s}(x_s, x_C^i), \quad i=1, \dots, N $$

The heterogeneity is: $$ \mathcal{H}^{d-PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^i(x_s) - \hat{f}^{d-PDP}(x_s) )^2} $$

Notes

The required parameters are data and model. The rest are optional.
The model_jac is the Jacobian of the model. If None, the Jacobian will be computed numerically.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`Callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, )`	required
`model_jac`	`Optional[Callable]`	the black-box model Jacobian. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, D)`	`None`
`axis_limits`	`Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`nof_instances`	`Union[int, str]`	maximum number of instances to be used for PDP. use "all", for using all instances. use an `int`, for using `nof_instances` instances.	`300`
`avg_output`	`Optional[float]`	The average output of the model. use a `float`, to specify it manually use `None`, to be inferred as `np.mean(model(data))`	`None`
`feature_names`	`Optional[List]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_pdp.py

def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        model_jac: Optional[Callable] = None,
        axis_limits: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = 300,
        avg_output: Optional[float] = None,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
):
    """
    Constructor of the DerivativePDP class.

    Definition:
        d-PDP is defined as:
        $$
        \hat{f}^{d-PDP}(x_s) = {1 \over N} \sum_{i=1}^N {df \over d x_s} (x_s, x_C^i)
        $$

        The d-ICE plots are:
        $$
        \hat{f}^i(x_s) = {df \over d x_s}(x_s, x_C^i), \quad i=1, \dots, N
        $$

        The heterogeneity is:
        $$
        \mathcal{H}^{d-PDP}(x_s) = \sqrt {{1 \over N} \sum_{i=1}^N ( \hat{f}^i(x_s) - \hat{f}^{d-PDP}(x_s) )^2}
        $$

    Notes:
        - The required parameters are `data` and `model`. The rest are optional.
        - The `model_jac` is the Jacobian of the model. If `None`, the Jacobian will be computed numerically.

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, )`

        model_jac: the black-box model Jacobian. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, D)`

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        nof_instances: maximum number of instances to be used for PDP.

            - use "all", for using all instances.
            - use an `int`, for using `nof_instances` instances.

        avg_output: The average output of the model.

            - use a `float`, to specify it manually
            - use `None`, to be inferred as `np.mean(model(data))`

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """

    super(DerPDP, self).__init__(
        data, model, model_jac, axis_limits, avg_output, nof_instances, feature_names, target_name, method_name="d-PDP"
    )

`effector.global_effect_shap.ShapDP`

Bases: GlobalEffectBase

Source code in /home/runner/work/effector/effector/effector/global_effect_shap.py

class ShapDP(GlobalEffectBase):
    def __init__(
            self,
            data: np.ndarray,
            model: Callable,
            axis_limits: Optional[np.ndarray] = None,
            nof_instances: Union[int, str] = 100,
            avg_output: Optional[float] = None,
            feature_names: Optional[List[str]] = None,
            target_name: Optional[str] = None,
    ):
        """
        Constructor of the SHAPDependence class.

        Definition:
            The value of a coalition of $S$ features is estimated as:
            $$
            \hat{v}(S) = {1 \over N} \sum_{i=1}^N  f(x_S \cup x_C^i) - f(x^i)
            $$
            The value of a coalition $S$ quantifies what the values $\mathbf{x}_S$ of the features in $S$ contribute to the output of the model. It
            is the average (over all instances) difference on the output between setting features in $S$ to be $x_S$, i.e., $\mathbf{x} = (\mathbf{x}_S, \mathbf{x}_C^i)$ and leaving the instance as it is, i.e., $\mathbf{x}^i = (\mathbf{x}_S^i, \mathbf{x}_C^i)$.

            The contribution of a feature $j$ added to a coalition $S$ is estimated as:
            $$
            \hat{\Delta}_{S, j} = \hat{v}(S \cup \{j\}) - \hat{v}(S)
            $$

            The SHAP value of a feature $j$ with value $x_j$ is the average contribution of feature $j$ across all possible coalitions with a weight $w_{S, j}$:

            $$
            \hat{\phi}_j(x_j) = {1 \over N} \sum_{S \subseteq \{1, \dots, D\} \setminus \{j\}} w_{S, j} \hat{\Delta}_{S, j}
            $$

            where $w_{S, j}$ assures that the contribution of feature $j$ is the same for all coalitions of the same size. For example, there are $D-1$ ways for $x_j$ to enter a coalition of $|S| = 1$ feature, so $w_{S, j} = {1 \over D (D-1)}$ for each of them. In contrast, there is only one way for $x_j$ to enter a coaltion of $|S|=0$ (to be the first specified feature), so $w_{S, j} = {1 \over D}$.

            The SHAP Dependence Plot (SHAP-DP) is a spline $\hat{f}^{SDP}_j(x_j)$ fit to the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$ using the `UnivariateSpline` function from `scipy.interpolate`.

        Notes:
            * The required parameters are `data` and `model`. The rest are optional.
            * SHAP values are computed using the `shap` package, using the class `Explainer`.
            * SHAP values are centered by default, i.e., the average SHAP value is subtracted from the SHAP values.
            * More details on the SHAP values can be found in the [original paper](https://arxiv.org/abs/1705.07874) and in the book [Interpreting Machine Learning Models with SHAP](https://christophmolnar.com/books/shap/)

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N,)`

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            nof_instances: maximum number of instances to be used for SHAP estimation.

                - use "all", for using all instances.
                - use an `int`, for using `nof_instances` instances.

            avg_output: The average output of the model.

                - use a `float`, to specify it manually
                - use `None`, to be inferred as `np.mean(model(data))`

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """
        self.nof_instances, self.indices = helpers.prep_nof_instances(
            nof_instances, data.shape[0]
        )
        data = data[self.indices, :]

        super(ShapDP, self).__init__(
            "SHAP DP", data, model, nof_instances, axis_limits, avg_output, feature_names, target_name
        )

    def _fit_feature(
        self,
        feature: int,
        centering: typing.Union[bool, str] = False,
        points_for_centering: int = 100,
    ) -> typing.Dict:

        # drop points outside of limits
        self.data = self.data[self.data[:, feature] >= self.axis_limits[0, feature]]
        self.data = self.data[self.data[:, feature] <= self.axis_limits[1, feature]]

        # compute shap values
        data = self.data
        shap_explainer = shap.Explainer(self.model, data)
        explanation = shap_explainer(data)

        # extract x and y pais
        yy = explanation.values[:, feature]
        xx = data[:, feature]

        # make xx monotonic
        idx = np.argsort(xx)
        xx = xx[idx]
        yy = yy[idx]

        # fit spline_mean to xx, yy pairs
        spline_mean = UnivariateSpline(xx, yy)

        # fit spline_mean to the sqrt of the residuals
        yy_std = np.abs(yy - spline_mean(xx))
        spline_std = UnivariateSpline(xx, yy_std)

        # compute norm constant
        if centering == "zero_integral":
            x_norm = np.linspace(xx[0], xx[-1], points_for_centering)
            y_norm = spline_mean(x_norm)
            norm_const = np.trapz(y_norm, x_norm) / (xx[-1] - xx[0])
        elif centering == "zero_start":
            norm_const = spline_mean(xx[0])
        else:
            norm_const = helpers.EMPTY_SYMBOL

        ret_dict = {
            "spline_mean": spline_mean,
            "spline_std": spline_std,
            "xx": xx,
            "yy": yy,
            "norm_const": norm_const,
        }
        return ret_dict

    def fit(
            self,
            features: Union[int, str, List] = "all",
            centering: Union[bool, str] = False,
            points_for_centering: Union[int, str] = 100,
    ) -> None:
        """Fit the SHAP Dependence Plot to the data.

        Notes:
            The SHAP Dependence Plot (SDP) $\hat{f}^{SDP}_j(x_j)$ is a spline fit to
            the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$
            using the `UnivariateSpline` function from `scipy.interpolate`.

            The SHAP standard deviation, $\hat{\sigma}^{SDP}_j(x_j)$, is a spline fit            to the absolute value of the residuals, i.e., to the dataset $\{(x_j^i, |\hat{\phi}_j(x_j^i) - \hat{f}^{SDP}_j(x_j^i)|)\}_{i=1}^N$, using the `UnivariateSpline` function from `scipy.interpolate`.

        Args:
            features: the features to fit.
                - If set to "all", all the features will be fitted.
            centering:
                - If set to False, no centering will be applied.
                - If set to "zero_integral" or True, the integral of the feature effect will be set to zero.
                - If set to "zero_mean", the mean of the feature effect will be set to zero.

            points_for_centering: number of linspaced points along the feature axis used for centering.

                - If set to `all`, all the dataset points will be used.

        Notes:
            SHAP values are by default centered, i.e., $\sum_{i=1}^N \hat{\phi}_j(x_j^i) = 0$. This does not mean that the SHAP _curve_ is centered around zero; this happens only if the $s$-th feature of the dataset instances, i.e., the set $\{x_s^i\}_{i=1}^N$ is uniformly distributed along the $s$-th axis. So, use:

            * `centering=False`, to leave the SHAP values as they are.
            * `centering=True` or `centering=zero_integral`, to center the SHAP curve around the `y` axis.
            * `centering=zero_start`, to start the SHAP curve from `y=0`.

            SHAP values are expensive to compute.
            To speed up the computation consider using a subset of the dataset
            points for computing the SHAP values and for centering the spline.
            The default values (`points_for_fitting_spline=100`
            and `points_for_centering=100`) are a moderate choice.
        """
        centering = helpers.prep_centering(centering)
        features = helpers.prep_features(features, self.dim)

        # new implementation
        for s in features:
            self.feature_effect["feature_" + str(s)] = self._fit_feature(
                s, centering, points_for_centering
            )
            self.is_fitted[s] = True
            self.method_args["feature_" + str(s)] = {
                "centering": centering,
                "points_for_centering": points_for_centering,
            }

    def eval(
        self,
        feature: int,
        xs: np.ndarray,
        heterogeneity: bool = False,
        centering: typing.Union[bool, str] = False,
    ) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
        """Evaluate the effect of the s-th feature at positions `xs`.

        Args:
            feature: index of feature of interest
            xs: the points along the s-th axis to evaluate the FE plot

              - `np.ndarray` of shape `(T,)`
            heterogeneity: whether to return the heterogeneity measures.

                  - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
                  - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

            centering: whether to center the plot

                - If `centering` is `False`, the SHAP curve is not centered
                - If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis.
                - If `centering` is `zero_start`, the SHAP curve starts from `y=0`.

        Returns:
            the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std, estimator_var)` otherwise
        """
        centering = helpers.prep_centering(centering)

        if self.refit(feature, centering):
            self.fit(features=feature, centering=centering)

        # Check if the lower bound is less than the upper bound
        assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

        yy = self.feature_effect["feature_" + str(feature)]["spline_mean"](xs)

        if centering is not False:
            norm_const = self.feature_effect["feature_" + str(feature)]["norm_const"]
            yy = yy - norm_const

        if heterogeneity:
            yy_std = self.feature_effect["feature_" + str(feature)]["spline_std"](xs)
            return yy, yy_std
        else:
            return yy

    def plot(
        self,
        feature: int,
        heterogeneity: Union[bool, str] = False,
        centering: Union[bool, str] = False,
        nof_points: int = 30,
        scale_x: Optional[dict] = None,
        scale_y: Optional[dict] = None,
        nof_shap_values: Union[int, str] = "all",
        show_avg_output: bool = False,
        y_limits: Optional[List] = None,
    ) -> None:
        """
        Plot the SHAP Dependence Plot (SDP) of the s-th feature.

        Args:
            feature: index of the plotted feature
            heterogeneity: whether to output the heterogeneity of the SHAP values

                - If `heterogeneity` is `False`, no heterogeneity is plotted
                - If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted
                - If `heterogeneity` is `"shap_values"`, the shap values are scattered on top of the SHAP curve

            centering: whether to center the SDP

                - If `centering` is `False`, the SHAP curve is not centered
                - If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis.
                - If `centering` is `zero_start`, the SHAP curve starts from `y=0`.

            nof_points: number of points to evaluate the SDP plot
            scale_x: dictionary with keys "mean" and "std" for scaling the x-axis
            scale_y: dictionary with keys "mean" and "std" for scaling the y-axis
            nof_shap_values: number of shap values to show on top of the SHAP curve
            show_avg_output: whether to show the average output of the model
            y_limits: limits of the y-axis
        """
        heterogeneity = helpers.prep_confidence_interval(heterogeneity)

        x = np.linspace(
            self.axis_limits[0, feature], self.axis_limits[1, feature], nof_points
        )

        # get the SHAP curve
        y = self.eval(feature, x, heterogeneity=False, centering=centering)
        y_std = (
            self.feature_effect["feature_" + str(feature)]["spline_std"](x)
            if heterogeneity == "std" or True
            else None
        )

        # get some SHAP values
        _, ind = helpers.prep_nof_instances(nof_shap_values, self.data.shape[0])
        yy = (
            self.feature_effect["feature_" + str(feature)]["yy"][ind]
            if heterogeneity == "shap_values"
            else None
        )
        if yy is not None and centering is not False:
            yy = yy - self.feature_effect["feature_" + str(feature)]["norm_const"]
        xx = (
            self.feature_effect["feature_" + str(feature)]["xx"][ind]
            if heterogeneity == "shap_values"
            else None
        )

        if show_avg_output:
            avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
        else:
            avg_output = None

        vis.plot_shap(
            x,
            y,
            xx,
            yy,
            y_std,
            feature,
            heterogeneity=heterogeneity,
            scale_x=scale_x,
            scale_y=scale_y,
            avg_output=avg_output,
            feature_names=self.feature_names,
            target_name=self.target_name,
            y_limits=y_limits
        )

`init(data, model, axis_limits=None, nof_instances=100, avg_output=None, feature_names=None, target_name=None)`

Constructor of the SHAPDependence class.

Definition

The value of a coalition of $S$ features is estimated as: $$ \hat{v}(S) = {1 \over N} \sum_{i=1}^N f(x_S \cup x_C^i) - f(x^i) $$ The value of a coalition $S$ quantifies what the values $\mathbf{x}_S$ of the features in $S$ contribute to the output of the model. It is the average (over all instances) difference on the output between setting features in $S$ to be $x_S$, i.e., $\mathbf{x} = (\mathbf{x}_S, \mathbf{x}_C^i)$ and leaving the instance as it is, i.e., $\mathbf{x}^i = (\mathbf{x}_S^i, \mathbf{x}_C^i)$.

The contribution of a feature $j$ added to a coalition $S$ is estimated as: $$ \hat{\Delta}_{S, j} = \hat{v}(S \cup {j}) - \hat{v}(S) $$

The SHAP value of a feature $j$ with value $x_j$ is the average contribution of feature $j$ across all possible coalitions with a weight $w_{S, j}$:

\[ \hat{\phi}_j(x_j) = {1 \over N} \sum_{S \subseteq \{1, \dots, D\} \setminus \{j\}} w_{S, j} \hat{\Delta}_{S, j} \]

where $w_{S, j}$ assures that the contribution of feature $j$ is the same for all coalitions of the same size. For example, there are $D-1$ ways for $x_j$ to enter a coalition of $|S| = 1$ feature, so $w_{S, j} = {1 \over D (D-1)}$ for each of them. In contrast, there is only one way for $x_j$ to enter a coaltion of $|S|=0$ (to be the first specified feature), so $w_{S, j} = {1 \over D}$.

The SHAP Dependence Plot (SHAP-DP) is a spline $\hat{f}^{SDP}_j(x_j)$ fit to the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$ using the UnivariateSpline function from scipy.interpolate.

Notes

The required parameters are data and model. The rest are optional.
SHAP values are computed using the shap package, using the class Explainer.
SHAP values are centered by default, i.e., the average SHAP value is subtracted from the SHAP values.
More details on the SHAP values can be found in the original paper and in the book Interpreting Machine Learning Models with SHAP

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`Callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N,)`	required
`axis_limits`	`Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`nof_instances`	`Union[int, str]`	maximum number of instances to be used for SHAP estimation. use "all", for using all instances. use an `int`, for using `nof_instances` instances.	`100`
`avg_output`	`Optional[float]`	The average output of the model. use a `float`, to specify it manually use `None`, to be inferred as `np.mean(model(data))`	`None`
`feature_names`	`Optional[List[str]]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_shap.py

def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        axis_limits: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = 100,
        avg_output: Optional[float] = None,
        feature_names: Optional[List[str]] = None,
        target_name: Optional[str] = None,
):
    """
    Constructor of the SHAPDependence class.

    Definition:
        The value of a coalition of $S$ features is estimated as:
        $$
        \hat{v}(S) = {1 \over N} \sum_{i=1}^N  f(x_S \cup x_C^i) - f(x^i)
        $$
        The value of a coalition $S$ quantifies what the values $\mathbf{x}_S$ of the features in $S$ contribute to the output of the model. It
        is the average (over all instances) difference on the output between setting features in $S$ to be $x_S$, i.e., $\mathbf{x} = (\mathbf{x}_S, \mathbf{x}_C^i)$ and leaving the instance as it is, i.e., $\mathbf{x}^i = (\mathbf{x}_S^i, \mathbf{x}_C^i)$.

        The contribution of a feature $j$ added to a coalition $S$ is estimated as:
        $$
        \hat{\Delta}_{S, j} = \hat{v}(S \cup \{j\}) - \hat{v}(S)
        $$

        The SHAP value of a feature $j$ with value $x_j$ is the average contribution of feature $j$ across all possible coalitions with a weight $w_{S, j}$:

        $$
        \hat{\phi}_j(x_j) = {1 \over N} \sum_{S \subseteq \{1, \dots, D\} \setminus \{j\}} w_{S, j} \hat{\Delta}_{S, j}
        $$

        where $w_{S, j}$ assures that the contribution of feature $j$ is the same for all coalitions of the same size. For example, there are $D-1$ ways for $x_j$ to enter a coalition of $|S| = 1$ feature, so $w_{S, j} = {1 \over D (D-1)}$ for each of them. In contrast, there is only one way for $x_j$ to enter a coaltion of $|S|=0$ (to be the first specified feature), so $w_{S, j} = {1 \over D}$.

        The SHAP Dependence Plot (SHAP-DP) is a spline $\hat{f}^{SDP}_j(x_j)$ fit to the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$ using the `UnivariateSpline` function from `scipy.interpolate`.

    Notes:
        * The required parameters are `data` and `model`. The rest are optional.
        * SHAP values are computed using the `shap` package, using the class `Explainer`.
        * SHAP values are centered by default, i.e., the average SHAP value is subtracted from the SHAP values.
        * More details on the SHAP values can be found in the [original paper](https://arxiv.org/abs/1705.07874) and in the book [Interpreting Machine Learning Models with SHAP](https://christophmolnar.com/books/shap/)

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N,)`

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        nof_instances: maximum number of instances to be used for SHAP estimation.

            - use "all", for using all instances.
            - use an `int`, for using `nof_instances` instances.

        avg_output: The average output of the model.

            - use a `float`, to specify it manually
            - use `None`, to be inferred as `np.mean(model(data))`

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """
    self.nof_instances, self.indices = helpers.prep_nof_instances(
        nof_instances, data.shape[0]
    )
    data = data[self.indices, :]

    super(ShapDP, self).__init__(
        "SHAP DP", data, model, nof_instances, axis_limits, avg_output, feature_names, target_name
    )

`fit(features='all', centering=False, points_for_centering=100)`

Fit the SHAP Dependence Plot to the data.

Notes

The SHAP Dependence Plot (SDP) $\hat{f}^{SDP}_j(x_j)$ is a spline fit to the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$ using the UnivariateSpline function from scipy.interpolate.

The SHAP standard deviation, $\hat{\sigma}^{SDP}_j(x_j)$, is a spline fit to the absolute value of the residuals, i.e., to the dataset $\{(x_j^i, |\hat{\phi}_j(x_j^i) - \hat{f}^{SDP}_j(x_j^i)|)\}_{i=1}^N$, using the UnivariateSpline function from scipy.interpolate.

Parameters:

Name	Type	Description	Default
`features`	`Union[int, str, List]`	the features to fit. - If set to "all", all the features will be fitted.	`'all'`
`centering`	`Union[bool, str]`	If set to False, no centering will be applied. If set to "zero_integral" or True, the integral of the feature effect will be set to zero. If set to "zero_mean", the mean of the feature effect will be set to zero.	`False`
`points_for_centering`	`Union[int, str]`	number of linspaced points along the feature axis used for centering. If set to `all`, all the dataset points will be used.	`100`

Notes

SHAP values are by default centered, i.e., $\sum_{i=1}^N \hat{\phi}_j(x_j^i) = 0$. This does not mean that the SHAP curve is centered around zero; this happens only if the $s$-th feature of the dataset instances, i.e., the set $\{x_s^i\}_{i=1}^N$ is uniformly distributed along the $s$-th axis. So, use:

centering=False, to leave the SHAP values as they are.
centering=True or centering=zero_integral, to center the SHAP curve around the y axis.
centering=zero_start, to start the SHAP curve from y=0.

SHAP values are expensive to compute. To speed up the computation consider using a subset of the dataset points for computing the SHAP values and for centering the spline. The default values (points_for_fitting_spline=100 and points_for_centering=100) are a moderate choice.

Source code in /home/runner/work/effector/effector/effector/global_effect_shap.py

def fit(
        self,
        features: Union[int, str, List] = "all",
        centering: Union[bool, str] = False,
        points_for_centering: Union[int, str] = 100,
) -> None:
    """Fit the SHAP Dependence Plot to the data.

    Notes:
        The SHAP Dependence Plot (SDP) $\hat{f}^{SDP}_j(x_j)$ is a spline fit to
        the dataset $\{(x_j^i, \hat{\phi}_j(x_j^i))\}_{i=1}^N$
        using the `UnivariateSpline` function from `scipy.interpolate`.

        The SHAP standard deviation, $\hat{\sigma}^{SDP}_j(x_j)$, is a spline fit            to the absolute value of the residuals, i.e., to the dataset $\{(x_j^i, |\hat{\phi}_j(x_j^i) - \hat{f}^{SDP}_j(x_j^i)|)\}_{i=1}^N$, using the `UnivariateSpline` function from `scipy.interpolate`.

    Args:
        features: the features to fit.
            - If set to "all", all the features will be fitted.
        centering:
            - If set to False, no centering will be applied.
            - If set to "zero_integral" or True, the integral of the feature effect will be set to zero.
            - If set to "zero_mean", the mean of the feature effect will be set to zero.

        points_for_centering: number of linspaced points along the feature axis used for centering.

            - If set to `all`, all the dataset points will be used.

    Notes:
        SHAP values are by default centered, i.e., $\sum_{i=1}^N \hat{\phi}_j(x_j^i) = 0$. This does not mean that the SHAP _curve_ is centered around zero; this happens only if the $s$-th feature of the dataset instances, i.e., the set $\{x_s^i\}_{i=1}^N$ is uniformly distributed along the $s$-th axis. So, use:

        * `centering=False`, to leave the SHAP values as they are.
        * `centering=True` or `centering=zero_integral`, to center the SHAP curve around the `y` axis.
        * `centering=zero_start`, to start the SHAP curve from `y=0`.

        SHAP values are expensive to compute.
        To speed up the computation consider using a subset of the dataset
        points for computing the SHAP values and for centering the spline.
        The default values (`points_for_fitting_spline=100`
        and `points_for_centering=100`) are a moderate choice.
    """
    centering = helpers.prep_centering(centering)
    features = helpers.prep_features(features, self.dim)

    # new implementation
    for s in features:
        self.feature_effect["feature_" + str(s)] = self._fit_feature(
            s, centering, points_for_centering
        )
        self.is_fitted[s] = True
        self.method_args["feature_" + str(s)] = {
            "centering": centering,
            "points_for_centering": points_for_centering,
        }

`eval(feature, xs, heterogeneity=False, centering=False)`

Evaluate the effect of the s-th feature at positions xs.

Parameters:

Name	Type	Description	Default
`feature`	`int`	index of feature of interest	required
`xs`	`np.ndarray`	the points along the s-th axis to evaluate the FE plot `np.ndarray` of shape `(T,)`	required
`heterogeneity`	`bool`	whether to return the heterogeneity measures. if `heterogeneity=False`, the function returns the mean effect at the given `xs` If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect	`False`
`centering`	`typing.Union[bool, str]`	whether to center the plot If `centering` is `False`, the SHAP curve is not centered If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis. If `centering` is `zero_start`, the SHAP curve starts from `y=0`.	`False`

Returns:

Type	Description
`typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]`	the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std, estimator_var)` otherwise

Source code in /home/runner/work/effector/effector/effector/global_effect_shap.py

def eval(
    self,
    feature: int,
    xs: np.ndarray,
    heterogeneity: bool = False,
    centering: typing.Union[bool, str] = False,
) -> typing.Union[np.ndarray, typing.Tuple[np.ndarray, np.ndarray]]:
    """Evaluate the effect of the s-th feature at positions `xs`.

    Args:
        feature: index of feature of interest
        xs: the points along the s-th axis to evaluate the FE plot

          - `np.ndarray` of shape `(T,)`
        heterogeneity: whether to return the heterogeneity measures.

              - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
              - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

        centering: whether to center the plot

            - If `centering` is `False`, the SHAP curve is not centered
            - If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis.
            - If `centering` is `zero_start`, the SHAP curve starts from `y=0`.

    Returns:
        the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std, estimator_var)` otherwise
    """
    centering = helpers.prep_centering(centering)

    if self.refit(feature, centering):
        self.fit(features=feature, centering=centering)

    # Check if the lower bound is less than the upper bound
    assert self.axis_limits[0, feature] < self.axis_limits[1, feature]

    yy = self.feature_effect["feature_" + str(feature)]["spline_mean"](xs)

    if centering is not False:
        norm_const = self.feature_effect["feature_" + str(feature)]["norm_const"]
        yy = yy - norm_const

    if heterogeneity:
        yy_std = self.feature_effect["feature_" + str(feature)]["spline_std"](xs)
        return yy, yy_std
    else:
        return yy

`plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_shap_values='all', show_avg_output=False, y_limits=None)`

Plot the SHAP Dependence Plot (SDP) of the s-th feature.

Parameters:

Name	Type	Description	Default
`feature`	`int`	index of the plotted feature	required
`heterogeneity`	`Union[bool, str]`	whether to output the heterogeneity of the SHAP values If `heterogeneity` is `False`, no heterogeneity is plotted If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted If `heterogeneity` is `"shap_values"`, the shap values are scattered on top of the SHAP curve	`False`
`centering`	`Union[bool, str]`	whether to center the SDP If `centering` is `False`, the SHAP curve is not centered If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis. If `centering` is `zero_start`, the SHAP curve starts from `y=0`.	`False`
`nof_points`	`int`	number of points to evaluate the SDP plot	`30`
`scale_x`	`Optional[dict]`	dictionary with keys "mean" and "std" for scaling the x-axis	`None`
`scale_y`	`Optional[dict]`	dictionary with keys "mean" and "std" for scaling the y-axis	`None`
`nof_shap_values`	`Union[int, str]`	number of shap values to show on top of the SHAP curve	`'all'`
`show_avg_output`	`bool`	whether to show the average output of the model	`False`
`y_limits`	`Optional[List]`	limits of the y-axis	`None`

Source code in /home/runner/work/effector/effector/effector/global_effect_shap.py

def plot(
    self,
    feature: int,
    heterogeneity: Union[bool, str] = False,
    centering: Union[bool, str] = False,
    nof_points: int = 30,
    scale_x: Optional[dict] = None,
    scale_y: Optional[dict] = None,
    nof_shap_values: Union[int, str] = "all",
    show_avg_output: bool = False,
    y_limits: Optional[List] = None,
) -> None:
    """
    Plot the SHAP Dependence Plot (SDP) of the s-th feature.

    Args:
        feature: index of the plotted feature
        heterogeneity: whether to output the heterogeneity of the SHAP values

            - If `heterogeneity` is `False`, no heterogeneity is plotted
            - If `heterogeneity` is `True` or `"std"`, the standard deviation of the shap values is plotted
            - If `heterogeneity` is `"shap_values"`, the shap values are scattered on top of the SHAP curve

        centering: whether to center the SDP

            - If `centering` is `False`, the SHAP curve is not centered
            - If `centering` is `True` or `zero_integral`, the SHAP curve is centered around the `y` axis.
            - If `centering` is `zero_start`, the SHAP curve starts from `y=0`.

        nof_points: number of points to evaluate the SDP plot
        scale_x: dictionary with keys "mean" and "std" for scaling the x-axis
        scale_y: dictionary with keys "mean" and "std" for scaling the y-axis
        nof_shap_values: number of shap values to show on top of the SHAP curve
        show_avg_output: whether to show the average output of the model
        y_limits: limits of the y-axis
    """
    heterogeneity = helpers.prep_confidence_interval(heterogeneity)

    x = np.linspace(
        self.axis_limits[0, feature], self.axis_limits[1, feature], nof_points
    )

    # get the SHAP curve
    y = self.eval(feature, x, heterogeneity=False, centering=centering)
    y_std = (
        self.feature_effect["feature_" + str(feature)]["spline_std"](x)
        if heterogeneity == "std" or True
        else None
    )

    # get some SHAP values
    _, ind = helpers.prep_nof_instances(nof_shap_values, self.data.shape[0])
    yy = (
        self.feature_effect["feature_" + str(feature)]["yy"][ind]
        if heterogeneity == "shap_values"
        else None
    )
    if yy is not None and centering is not False:
        yy = yy - self.feature_effect["feature_" + str(feature)]["norm_const"]
    xx = (
        self.feature_effect["feature_" + str(feature)]["xx"][ind]
        if heterogeneity == "shap_values"
        else None
    )

    if show_avg_output:
        avg_output = helpers.prep_avg_output(self.data, self.model, self.avg_output, scale_y)
    else:
        avg_output = None

    vis.plot_shap(
        x,
        y,
        xx,
        yy,
        y_std,
        feature,
        heterogeneity=heterogeneity,
        scale_x=scale_x,
        scale_y=scale_y,
        avg_output=avg_output,
        feature_names=self.feature_names,
        target_name=self.target_name,
        y_limits=y_limits
    )

Regional Effect Methods

`effector.regional_effect.RegionalEffectBase`

Source code in /home/runner/work/effector/effector/effector/regional_effect.py

class RegionalEffectBase:
    empty_symbol = helpers.EMPTY_SYMBOL

    def __init__(
        self,
        method_name: str,
        data: np.ndarray,
        model: Callable,
        model_jac: Optional[Callable] = None,
        data_effect: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = 100,
        axis_limits: Optional[np.ndarray] = None,
        feature_types: Optional[List] = None,
        cat_limit: Optional[int] = 10,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
    ) -> None:
        """
        Constructor for the RegionalEffect class.
        """
        self.method_name = method_name.lower()
        self.model = model
        self.model_jac = model_jac

        # select nof_instances from the data
        self.nof_instances, self.indices = helpers.prep_nof_instances(
            nof_instances, data.shape[0]
        )
        self.data = data[self.indices, :]
        self.instance_effects = data_effect[self.indices, :] if data_effect is not None else None
        self.dim = self.data.shape[1]

        # set axis_limits
        axis_limits = (
            helpers.axis_limits_from_data(data) if axis_limits is None else axis_limits
        )
        self.axis_limits: np.ndarray = axis_limits

        # set feature types
        self.cat_limit = cat_limit
        feature_types = (
            utils.get_feature_types(data, cat_limit)
            if feature_types is None
            else feature_types
        )
        self.feature_types: list = feature_types

        # set feature names
        feature_names: list[str] = (
            helpers.get_feature_names(axis_limits.shape[1])
            if feature_names is None
            else feature_names
        )
        self.feature_names: list = feature_names

        # set target name
        self.target_name = "y" if target_name is None else target_name

        # state variables
        self.is_fitted: np.ndarray = np.ones([self.dim]) < 0

        # parameters used when fitting the regional effect
        self.method_args: typing.Dict = {}

        # dictionary with all the information required for plotting or evaluating the regional effects
        self.partitioners: typing.Dict[str, Regions] = {}
        self.tree_full: typing.Dict[str, Tree] = {}
        self.tree_pruned: typing.Dict[str, Tree] = {}
        self.tree_full_scaled: typing.Dict[str, Tree] = {}
        self.tree_pruned_scaled: typing.Dict[str, Tree] = {}

    def _fit_feature(
        self,
        feature: int,
        heter_func: Callable,
        heter_pcg_drop_thres: float = 0.1,
        heter_small_enough: float = 0.1,
        max_split_levels: int = 2,
        candidate_positions_for_numerical: int = 20,
        min_points_per_subregion: int = 10,
        candidate_foc: Union[str, List] = "all",
        split_categorical_features: bool = False,
    ):
        """
        Find the subregions for a single feature.
        """
        # init Region Extractor
        regions = Regions(
            feature,
            heter_func,
            self.data,
            self.instance_effects,
            self.feature_types,
            self.feature_names,
            self.target_name,
            self.cat_limit,
            candidate_foc,
            min_points_per_subregion,
            candidate_positions_for_numerical,
            max_split_levels,
            heter_pcg_drop_thres,
            heter_small_enough,
            split_categorical_features,
        )

        # apply partitioning
        regions.search_all_splits()
        regions.choose_important_splits()
        self.tree_full["feature_{}".format(feature)] = regions.splits_to_tree()
        self.tree_pruned["feature_{}".format(feature)] = regions.splits_to_tree(True)

        # store the partitioning object
        self.partitioners["feature_{}".format(feature)] = regions

        # update state
        self.is_fitted[feature] = True

    def refit(self, feature):
        if not self.is_fitted[feature]:
            self.fit(feature)

    def get_node_info(self, feature, node_idx):
        assert self.is_fitted[feature], "Feature {} has not been fitted yet".format(feature)
        assert self.tree_pruned["feature_{}".format(feature)] is not None, "Feature {} has no splits".format(feature)

        if self.tree_pruned_scaled is not None and "feature_{}".format(feature) in self.tree_pruned_scaled.keys():
            tree = self.tree_pruned_scaled["feature_{}".format(feature)]
        else:
            tree = self.tree_pruned["feature_{}".format(feature)]

        # assert node id exists
        assert node_idx in [node.idx for node in tree.nodes], "Node {} does not exist".format(node_idx)

        # find the node
        node = [node for node in tree.nodes if node.idx == node_idx][0]

        # get data
        data = node.data["data"]
        data_effect = node.data["data_effect"]
        name = node.name
        return data, data_effect, name

    def _create_fe_object(self, data, data_effect, feature_names):
        if self.method_name == "rhale":
            return RHALE(data, self.model, self.model_jac, data_effect=data_effect, feature_names=feature_names, target_name=self.target_name)
        elif self.method_name == "ale":
            return ALE(data, self.model, feature_names=feature_names, target_name=self.target_name)
        elif self.method_name == "shap":
            return ShapDP(data, self.model, feature_names=feature_names, target_name=self.target_name)
        elif self.method_name == "pdp":
            return PDP(data, self.model, feature_names=feature_names, target_name=self.target_name)
        elif self.method_name == "d-pdp":
            return DerPDP(data, self.model, self.model_jac, feature_names=feature_names, target_name=self.target_name)
        else:
            raise NotImplementedError

    def eval(self, feature, node_idx, xs, heterogeneity=False, centering=False):
        """
        Evaluate the regional effect for a given feature and node.

        Args:
            feature: the feature to evaluate
            node_idx: the node corresponding to the subregion to evaluate
            xs: the points at which to evaluate the regional effect
            heterogeneity: whether to return the heterogeneity.

                  - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
                  - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

            centering: whether to center the regional effect. The following options are available:

                - If `centering` is `False`, the regional effect is not centered
                - If `centering` is `True` or `zero_integral`, the regional effect is centered around the `y` axis.
                - If `centering` is `zero_start`, the regional effect starts from `y=0`.

        Returns:
            the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

        """
        self.refit(feature)
        centering = helpers.prep_centering(centering)
        data, data_effect, _ = self.get_node_info(feature, node_idx)
        fe_method = self._create_fe_object(data, data_effect, None)
        return fe_method.eval(feature, xs, heterogeneity, centering)

    def fit(self, *args, **kwargs):
        raise NotImplementedError

    def plot(self,
             feature,
             node_idx,
             heterogeneity=False,
             centering=False,
             scale_x_list=None,
             scale_y=None,
             y_limits=None):

        self.refit(feature)

        if scale_x_list is not None:
            self.tree_full_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(False, scale_x_list)
            self.tree_pruned_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(True, scale_x_list)

        data, data_effect, name = self.get_node_info(feature, node_idx)
        feature_names = copy.deepcopy(self.feature_names)
        feature_names[feature] = name
        fe_method = self._create_fe_object(data, data_effect, feature_names)

        return fe_method.plot(
            feature=feature,
            heterogeneity=heterogeneity,
            centering=centering,
            scale_x=scale_x_list[feature] if scale_x_list is not None else None,
            scale_y=scale_y,
            y_limits=y_limits
            )

    def show_partitioning(self, features, only_important=True, scale_x_list=None):
        features = helpers.prep_features(features, self.dim)

        for feat in features:
            self.refit(feat)

            if scale_x_list is not None:
                tree_full_scaled = self.partitioners["feature_{}".format(feat)].splits_to_tree(True, scale_x_list)
                tree_pruned_scaled = self.partitioners["feature_{}".format(feat)].splits_to_tree(False, scale_x_list)
                tree_dict = tree_full_scaled if only_important else tree_pruned_scaled
            else:
                tree_dict = self.tree_pruned["feature_{}".format(feat)] if only_important else self.tree_full["feature_{}".format(feat)]

            print("Feature {} - Full partition tree:".format(feat))

            if tree_dict is None:
                print("No splits found for feature {}".format(feat))
            else:
                tree_dict.show_full_tree()

            print("-" * 50)
            print("Feature {} - Statistics per tree level:".format(feat))

            if tree_dict is None:
                print("No splits found for feature {}".format(feat))
            else:
                tree_dict.show_level_stats()

    def describe_subregions(
        self,
        features,
        only_important=True,
        scale_x_list: typing.Union[None, typing.List[dict]] = None,
    ):
        features = helpers.prep_features(features, self.dim)
        for feature in features:
            self.refit(feature)

            # it means it a categorical feature
            if self.tree_full["feature_{}".format(feature)] is None:
                continue

            feature_name = self.feature_names[feature]
            if only_important:
                tree = self.tree_pruned["feature_{}".format(feature)]
                if len(tree.nodes) == 1:
                    print("No important splits found for feature {}".format(feature))
                    continue
                else:
                    print("Important splits for feature {}".format(feature_name))
            else:
                print("All splits for feature {}".format(feature_name))
                tree = self.tree_full["feature_{}".format(feature)]

            max_level = max([node.level for node in tree.nodes])
            for level in range(1, max_level+1):
                previous_level_nodes = tree.get_level_nodes(level-1)
                level_nodes = tree.get_level_nodes(level)
                type_of_split_feature = level_nodes[0].data["feature_type"]
                foc_name = self.feature_names[level_nodes[0].data["feature"]]
                print("- On feature {} ({})".format(foc_name, type_of_split_feature))

                position_split_formatted = (
                    "{:.2f}".format(level_nodes[0].data["position"])
                    if scale_x_list is None
                    else "{:.2f}".format(
                        level_nodes[0].data["position"] * scale_x_list[level_nodes[0].data["feature"]]["std"]
                        + scale_x_list[level_nodes[0].data["feature"]]["mean"]
                    )
                )
                print("  - Position of split: {}".format(position_split_formatted))

                weight_heter_before = np.sum([node.data["weight"] * node.data["heterogeneity"] for node in previous_level_nodes])
                print("  - Heterogeneity before split: {:.2f}".format(weight_heter_before))

                weight_heter = np.sum([node.data["weight"] * node.data["heterogeneity"] for node in level_nodes])
                print("  - Heterogeneity after split: {:.2f}".format(weight_heter))
                weight_heter_drop = weight_heter_before - weight_heter
                print("  - Heterogeneity drop: {:.2f} ({:.2f} %)".format(
                    weight_heter_drop, weight_heter_drop / weight_heter_before * 100)
                )

                nof_instances_before = [nod.data["nof_instances"] for nod in previous_level_nodes]
                print("  - Number of instances before split: {}".format(nof_instances_before))
                nof_instances = [nod.data["nof_instances"] for nod in level_nodes]
                print("  - Number of instances after split: {}".format(nof_instances))

`eval(feature, node_idx, xs, heterogeneity=False, centering=False)`

Evaluate the regional effect for a given feature and node.

Parameters:

Name	Description	Default
`feature`	the feature to evaluate	required
`node_idx`	the node corresponding to the subregion to evaluate	required
`xs`	the points at which to evaluate the regional effect	required
`heterogeneity`	whether to return the heterogeneity. if `heterogeneity=False`, the function returns the mean effect at the given `xs` If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect	`False`
`centering`	whether to center the regional effect. The following options are available: If `centering` is `False`, the regional effect is not centered If `centering` is `True` or `zero_integral`, the regional effect is centered around the `y` axis. If `centering` is `zero_start`, the regional effect starts from `y=0`.	`False`

Returns:

Type	Description
	the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

Source code in /home/runner/work/effector/effector/effector/regional_effect.py

def eval(self, feature, node_idx, xs, heterogeneity=False, centering=False):
    """
    Evaluate the regional effect for a given feature and node.

    Args:
        feature: the feature to evaluate
        node_idx: the node corresponding to the subregion to evaluate
        xs: the points at which to evaluate the regional effect
        heterogeneity: whether to return the heterogeneity.

              - if `heterogeneity=False`, the function returns the mean effect at the given `xs`
              - If `heterogeneity=True`, the function returns `(y, std)` where `y` is the mean effect and `std` is the standard deviation of the mean effect

        centering: whether to center the regional effect. The following options are available:

            - If `centering` is `False`, the regional effect is not centered
            - If `centering` is `True` or `zero_integral`, the regional effect is centered around the `y` axis.
            - If `centering` is `zero_start`, the regional effect starts from `y=0`.

    Returns:
        the mean effect `y`, if `heterogeneity=False` (default) or a tuple `(y, std)` otherwise

    """
    self.refit(feature)
    centering = helpers.prep_centering(centering)
    data, data_effect, _ = self.get_node_info(feature, node_idx)
    fe_method = self._create_fe_object(data, data_effect, None)
    return fe_method.eval(feature, xs, heterogeneity, centering)

`plot(feature, node_idx, heterogeneity=False, centering=False, scale_x_list=None, scale_y=None, y_limits=None)`

Source code in /home/runner/work/effector/effector/effector/regional_effect.py

def plot(self,
         feature,
         node_idx,
         heterogeneity=False,
         centering=False,
         scale_x_list=None,
         scale_y=None,
         y_limits=None):

    self.refit(feature)

    if scale_x_list is not None:
        self.tree_full_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(False, scale_x_list)
        self.tree_pruned_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(True, scale_x_list)

    data, data_effect, name = self.get_node_info(feature, node_idx)
    feature_names = copy.deepcopy(self.feature_names)
    feature_names[feature] = name
    fe_method = self._create_fe_object(data, data_effect, feature_names)

    return fe_method.plot(
        feature=feature,
        heterogeneity=heterogeneity,
        centering=centering,
        scale_x=scale_x_list[feature] if scale_x_list is not None else None,
        scale_y=scale_y,
        y_limits=y_limits
        )

`get_node_info(feature, node_idx)`

Source code in /home/runner/work/effector/effector/effector/regional_effect.py

def get_node_info(self, feature, node_idx):
    assert self.is_fitted[feature], "Feature {} has not been fitted yet".format(feature)
    assert self.tree_pruned["feature_{}".format(feature)] is not None, "Feature {} has no splits".format(feature)

    if self.tree_pruned_scaled is not None and "feature_{}".format(feature) in self.tree_pruned_scaled.keys():
        tree = self.tree_pruned_scaled["feature_{}".format(feature)]
    else:
        tree = self.tree_pruned["feature_{}".format(feature)]

    # assert node id exists
    assert node_idx in [node.idx for node in tree.nodes], "Node {} does not exist".format(node_idx)

    # find the node
    node = [node for node in tree.nodes if node.idx == node_idx][0]

    # get data
    data = node.data["data"]
    data_effect = node.data["data_effect"]
    name = node.name
    return data, data_effect, name

`effector.regional_effect_ale.RegionalALE`

Bases: RegionalEffectBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

class RegionalALE(RegionalEffectBase):
    def __init__(
        self,
        data: np.ndarray,
        model: callable,
        nof_instances: typing.Union[int, str] = "all",
        axis_limits: typing.Union[None, np.ndarray] = None,
        feature_types: typing.Union[list, None] = None,
        cat_limit: typing.Union[int, None] = 10,
        feature_names: typing.Union[list, None] = None,
        target_name: typing.Union[str, None] = None,
    ):
        """
        Regional RHALE constructor.

        Args:
            data: X matrix (N,D).
            model: the black-box model (N,D) -> (N, )
            model_jac: the black-box model Jacobian (N,D) -> (N,D)
            axis_limits: axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.
            feature_types: list of feature types (categorical or numerical)
            cat_limit: the minimum number of unique values for a feature to be considered categorical
            feature_names: list of feature names
        """
        super(RegionalALE, self).__init__(
            "ale",
            data,
            model,
            None,
            None,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name
        )

    def _create_heterogeneity_function(self, foi, binning_method, min_points, centering):
        binning_method = prep_binning_method(binning_method)
        isinstance(binning_method, binning_methods.Fixed)

        def heter(data, instance_effects=None) -> float:
            if data.shape[0] < min_points:
                return BIG_M

            ale = ALE(data, self.model, "all", None, instance_effects)
            try:
                ale.fit(features=foi, binning_method=binning_method, centering=centering)
            except:
                return BIG_M

            # heterogeneity is the accumulated std at the end of the curve
            axis_limits = helpers.axis_limits_from_data(data)
            stop = np.array([axis_limits[:, foi][1]])
            _, z = ale.eval(feature=foi, xs=stop, heterogeneity=True)
            return z.item()

        return heter

    def fit(
        self,
        features: typing.Union[int, str, list],
        heter_pcg_drop_thres: float = 0.1,
        heter_small_enough: float = 0.1,
        max_depth: int = 1,
        nof_candidate_splits_for_numerical: int = 20,
        min_points_per_subregion: int = 10,
        candidate_conditioning_features: typing.Union["str", list] = "all",
        split_categorical_features: bool = False,
        binning_method: typing.Union[str, binning_methods.Fixed] = binning_methods.Fixed(nof_bins=20, min_points_per_bin=0),
        centering: typing.Union[bool, str] = False,
    ):
        """
        Find the Regional RHALE for a list of features.

        Args:
            features: list of features to fit
            heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
            heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
            binning_method: binning method to use
            max_depth: maximum number of splits to perform (depth of the tree)
            nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
            min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
            candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
        """

        assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
        features = helpers.prep_features(features, self.dim)
        for feat in tqdm(features):
            heter = self._create_heterogeneity_function(
                feat, binning_method, min_points_per_subregion, centering
            )

            self._fit_feature(
                feat,
                heter,
                heter_pcg_drop_thres,
                heter_small_enough,
                max_depth,
                nof_candidate_splits_for_numerical,
                min_points_per_subregion,
                candidate_conditioning_features,
                split_categorical_features,
            )

            self.method_args["feature_" + str(feat)] = {
                "heter_pcg_drop_thres": heter_pcg_drop_thres,
                "heter_small_enough": heter_small_enough,
                "max_depth": max_depth,
                "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
                "min_points_per_subregion": min_points_per_subregion,
                "candidate_conditioning_features": candidate_conditioning_features,
                "split_categorical_features": split_categorical_features,
                "binning_method": binning_method,
                "centering": centering,
            }

    def plot(self,
             feature,
             node_idx,
             heterogeneity=False,
             centering=False,
             scale_x_list=None,
             scale_y=None,
             y_limits=None,
             dy_limits=None):

        # get data from the node
        self.refit(feature)

        if scale_x_list is not None:
            self.tree_full_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(False, scale_x_list)
            self.tree_pruned_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(True, scale_x_list)

        data, data_effect, name = self.get_node_info(feature, node_idx)
        feature_names = copy.deepcopy(self.feature_names)
        feature_names[feature] = name

        # define the method and fit
        self.method_args["feature_" + str(feature)]["heterogeneity"] = heterogeneity
        rhale = RHALE(data, self.model, self.model_jac, self.nof_instances, None, data_effect, feature_names=feature_names)
        binning_method = prep_binning_method(self.method_args["feature_" + str(feature)]["binning_method"])
        rhale.fit(features=feature, binning_method=binning_method, centering=centering)
        scale_x = scale_x_list[feature] if scale_x_list is not None else None
        rhale.plot(feature=feature, heterogeneity=heterogeneity, centering=centering, scale_x=scale_x, scale_y=scale_y, y_limits=y_limits, dy_limits=dy_limits)

`init(data, model, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

Regional RHALE constructor.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	X matrix (N,D).	required
`model`	`callable`	the black-box model (N,D) -> (N, )	required
`model_jac`		the black-box model Jacobian (N,D) -> (N,D)	required
`axis_limits`	`typing.Union[None, np.ndarray]`	axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.	`None`
`feature_types`	`typing.Union[list, None]`	list of feature types (categorical or numerical)	`None`
`cat_limit`	`typing.Union[int, None]`	the minimum number of unique values for a feature to be considered categorical	`10`
`feature_names`	`typing.Union[list, None]`	list of feature names	`None`

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

def __init__(
    self,
    data: np.ndarray,
    model: callable,
    nof_instances: typing.Union[int, str] = "all",
    axis_limits: typing.Union[None, np.ndarray] = None,
    feature_types: typing.Union[list, None] = None,
    cat_limit: typing.Union[int, None] = 10,
    feature_names: typing.Union[list, None] = None,
    target_name: typing.Union[str, None] = None,
):
    """
    Regional RHALE constructor.

    Args:
        data: X matrix (N,D).
        model: the black-box model (N,D) -> (N, )
        model_jac: the black-box model Jacobian (N,D) -> (N,D)
        axis_limits: axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.
        feature_types: list of feature types (categorical or numerical)
        cat_limit: the minimum number of unique values for a feature to be considered categorical
        feature_names: list of feature names
    """
    super(RegionalALE, self).__init__(
        "ale",
        data,
        model,
        None,
        None,
        nof_instances,
        axis_limits,
        feature_types,
        cat_limit,
        feature_names,
        target_name
    )

`fit(features, heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, binning_method=binning_methods.Fixed(nof_bins=20, min_points_per_bin=0), centering=False)`

Find the Regional RHALE for a list of features.

Parameters:

Name	Type	Description	Default
`features`	`typing.Union[int, str, list]`	list of features to fit	required
`heter_pcg_drop_thres`	`float`	heterogeneity drop threshold for a split to be considered important	`0.1`
`heter_small_enough`	`float`	heterogeneity threshold for a region to be considered homogeneous (splitting stops)	`0.1`
`binning_method`	`typing.Union[str, binning_methods.Fixed]`	binning method to use	`binning_methods.Fixed(nof_bins=20, min_points_per_bin=0)`
`max_depth`	`int`	maximum number of splits to perform (depth of the tree)	`1`
`nof_candidate_splits_for_numerical`	`int`	number of candidate splits to consider for numerical features	`20`
`min_points_per_subregion`	`int`	minimum allowed number of points in a subregion (otherwise the split is not considered as valid)	`10`
`candidate_conditioning_features`	`typing.Union[str, list]`	list of features to consider as conditioning features for the candidate splits	`'all'`

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

def fit(
    self,
    features: typing.Union[int, str, list],
    heter_pcg_drop_thres: float = 0.1,
    heter_small_enough: float = 0.1,
    max_depth: int = 1,
    nof_candidate_splits_for_numerical: int = 20,
    min_points_per_subregion: int = 10,
    candidate_conditioning_features: typing.Union["str", list] = "all",
    split_categorical_features: bool = False,
    binning_method: typing.Union[str, binning_methods.Fixed] = binning_methods.Fixed(nof_bins=20, min_points_per_bin=0),
    centering: typing.Union[bool, str] = False,
):
    """
    Find the Regional RHALE for a list of features.

    Args:
        features: list of features to fit
        heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
        heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
        binning_method: binning method to use
        max_depth: maximum number of splits to perform (depth of the tree)
        nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
        min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
        candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
    """

    assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
    features = helpers.prep_features(features, self.dim)
    for feat in tqdm(features):
        heter = self._create_heterogeneity_function(
            feat, binning_method, min_points_per_subregion, centering
        )

        self._fit_feature(
            feat,
            heter,
            heter_pcg_drop_thres,
            heter_small_enough,
            max_depth,
            nof_candidate_splits_for_numerical,
            min_points_per_subregion,
            candidate_conditioning_features,
            split_categorical_features,
        )

        self.method_args["feature_" + str(feat)] = {
            "heter_pcg_drop_thres": heter_pcg_drop_thres,
            "heter_small_enough": heter_small_enough,
            "max_depth": max_depth,
            "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
            "min_points_per_subregion": min_points_per_subregion,
            "candidate_conditioning_features": candidate_conditioning_features,
            "split_categorical_features": split_categorical_features,
            "binning_method": binning_method,
            "centering": centering,
        }

`effector.regional_effect_ale.RegionalRHALE`

Bases: RegionalEffectBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

class RegionalRHALE(RegionalEffectBase):
    def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        model_jac: Optional[Callable] = None,
        instance_effects: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = "all",
        axis_limits: Optional[np.ndarray] = None,
        feature_types: Optional[List] = None,
        cat_limit: Optional[int] = 10,
        feature_names: Optional[List] = None,
        target_name: Optional[str] = None,
    ):
        """
        Regional RHALE constructor.

        Args:
            data: X matrix (N,D).
            model: the black-box model (N,D) -> (N, )
            model_jac: the black-box model Jacobian (N,D) -> (N,D)
            axis_limits: axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.
            feature_types: list of feature types (categorical or numerical)
            cat_limit: the minimum number of unique values for a feature to be considered categorical
            feature_names: list of feature names
        """

        if instance_effects is None:
            if model_jac is not None:
                instance_effects = model_jac(data)
            else:
                instance_effects = utils.compute_jacobian_numerically(model, data)


        super(RegionalRHALE, self).__init__(
            "rhale",
            data,
            model,
            model_jac,
            instance_effects,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name
        )

    def _create_heterogeneity_function(self, foi, binning_method, min_points, centering):
        binning_method = prep_binning_method(binning_method)

        def heter(data, instance_effects=None) -> float:
            if data.shape[0] < min_points:
                return BIG_M

            rhale = RHALE(data, self.model, self.model_jac, "all", None, instance_effects)
            try:
                rhale.fit(features=foi, binning_method=binning_method, centering=centering)
            except:
                return BIG_M

            # heterogeneity is the accumulated std at the end of the curve
            axis_limits = helpers.axis_limits_from_data(data)
            stop = np.array([axis_limits[:, foi][1]])
            _, z = rhale.eval(feature=foi, xs=stop, heterogeneity=True)
            return z.item()

        return heter

    def fit(
        self,
        features: typing.Union[int, str, list] = "all",
        heter_pcg_drop_thres: float = 0.1,
        heter_small_enough: float = 0.1,
        max_depth: int = 1,
        nof_candidate_splits_for_numerical: int = 20,
        min_points_per_subregion: int = 10,
        candidate_conditioning_features: typing.Union["str", list] = "all",
        split_categorical_features: bool = False,
        binning_method: typing.Union[
                str,
                binning_methods.Fixed,
                binning_methods.DynamicProgramming,
                binning_methods.Greedy,
        ] = "greedy",
        centering: typing.Union[bool, str] = False,
    ):
        """
        Find the Regional RHALE for a list of features.

        Args:
            features: list of features to fit
            heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
            heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
            binning_method: binning method to use
            max_depth: maximum number of splits to perform (depth of the tree)
            nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
            min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
            candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
        """

        assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
        features = helpers.prep_features(features, self.dim)
        for feat in tqdm(features):
            heter = self._create_heterogeneity_function(
                feat, binning_method, min_points_per_subregion, centering
            )

            self._fit_feature(
                feat,
                heter,
                heter_pcg_drop_thres,
                heter_small_enough,
                max_depth,
                nof_candidate_splits_for_numerical,
                min_points_per_subregion,
                candidate_conditioning_features,
                split_categorical_features,
            )

            self.method_args["feature_" + str(feat)] = {
                "heter_pcg_drop_thres": heter_pcg_drop_thres,
                "heter_small_enough": heter_small_enough,
                "max_depth": max_depth,
                "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
                "min_points_per_subregion": min_points_per_subregion,
                "candidate_conditioning_features": candidate_conditioning_features,
                "split_categorical_features": split_categorical_features,
                "binning_method": binning_method,
            }

    def plot(self,
             feature,
             node_idx,
             heterogeneity=False,
             centering=False,
             scale_x_list=None,
             scale_y=None,
             y_limits=None,
             dy_limits=None):

        # get data from the node
        self.refit(feature)

        if scale_x_list is not None:
            self.tree_full_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(False, scale_x_list)
            self.tree_pruned_scaled["feature_{}".format(feature)] = self.partitioners["feature_{}".format(feature)].splits_to_tree(True, scale_x_list)

        data, data_effect, name = self.get_node_info(feature, node_idx)
        feature_names = copy.deepcopy(self.feature_names)
        feature_names[feature] = name

        # define the method and fit
        self.method_args["feature_" + str(feature)]["heterogeneity"] = heterogeneity
        rhale = RHALE(data, self.model, self.model_jac, self.nof_instances, None, data_effect, feature_names=feature_names)
        binning_method = prep_binning_method(self.method_args["feature_" + str(feature)]["binning_method"])
        rhale.fit(features=feature, binning_method=binning_method, centering=centering)
        scale_x = scale_x_list[feature] if scale_x_list is not None else None
        rhale.plot(feature=feature, heterogeneity=heterogeneity, centering=centering, scale_x=scale_x, scale_y=scale_y, y_limits=y_limits, dy_limits=dy_limits)

`init(data, model, model_jac=None, instance_effects=None, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

Regional RHALE constructor.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	X matrix (N,D).	required
`model`	`Callable`	the black-box model (N,D) -> (N, )	required
`model_jac`	`Optional[Callable]`	the black-box model Jacobian (N,D) -> (N,D)	`None`
`axis_limits`	`Optional[np.ndarray]`	axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.	`None`
`feature_types`	`Optional[List]`	list of feature types (categorical or numerical)	`None`
`cat_limit`	`Optional[int]`	the minimum number of unique values for a feature to be considered categorical	`10`
`feature_names`	`Optional[List]`	list of feature names	`None`

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

def __init__(
    self,
    data: np.ndarray,
    model: Callable,
    model_jac: Optional[Callable] = None,
    instance_effects: Optional[np.ndarray] = None,
    nof_instances: Union[int, str] = "all",
    axis_limits: Optional[np.ndarray] = None,
    feature_types: Optional[List] = None,
    cat_limit: Optional[int] = 10,
    feature_names: Optional[List] = None,
    target_name: Optional[str] = None,
):
    """
    Regional RHALE constructor.

    Args:
        data: X matrix (N,D).
        model: the black-box model (N,D) -> (N, )
        model_jac: the black-box model Jacobian (N,D) -> (N,D)
        axis_limits: axis limits for the FE plot [2, D] or None. If None, axis limits are computed from the data.
        feature_types: list of feature types (categorical or numerical)
        cat_limit: the minimum number of unique values for a feature to be considered categorical
        feature_names: list of feature names
    """

    if instance_effects is None:
        if model_jac is not None:
            instance_effects = model_jac(data)
        else:
            instance_effects = utils.compute_jacobian_numerically(model, data)


    super(RegionalRHALE, self).__init__(
        "rhale",
        data,
        model,
        model_jac,
        instance_effects,
        nof_instances,
        axis_limits,
        feature_types,
        cat_limit,
        feature_names,
        target_name
    )

`fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, binning_method='greedy', centering=False)`

Find the Regional RHALE for a list of features.

Parameters:

Name	Type	Description	Default
`features`	`typing.Union[int, str, list]`	list of features to fit	`'all'`
`heter_pcg_drop_thres`	`float`	heterogeneity drop threshold for a split to be considered important	`0.1`
`heter_small_enough`	`float`	heterogeneity threshold for a region to be considered homogeneous (splitting stops)	`0.1`
`binning_method`	`typing.Union[str, binning_methods.Fixed, binning_methods.DynamicProgramming, binning_methods.Greedy]`	binning method to use	`'greedy'`
`max_depth`	`int`	maximum number of splits to perform (depth of the tree)	`1`
`nof_candidate_splits_for_numerical`	`int`	number of candidate splits to consider for numerical features	`20`
`min_points_per_subregion`	`int`	minimum allowed number of points in a subregion (otherwise the split is not considered as valid)	`10`
`candidate_conditioning_features`	`typing.Union[str, list]`	list of features to consider as conditioning features for the candidate splits	`'all'`

Source code in /home/runner/work/effector/effector/effector/regional_effect_ale.py

def fit(
    self,
    features: typing.Union[int, str, list] = "all",
    heter_pcg_drop_thres: float = 0.1,
    heter_small_enough: float = 0.1,
    max_depth: int = 1,
    nof_candidate_splits_for_numerical: int = 20,
    min_points_per_subregion: int = 10,
    candidate_conditioning_features: typing.Union["str", list] = "all",
    split_categorical_features: bool = False,
    binning_method: typing.Union[
            str,
            binning_methods.Fixed,
            binning_methods.DynamicProgramming,
            binning_methods.Greedy,
    ] = "greedy",
    centering: typing.Union[bool, str] = False,
):
    """
    Find the Regional RHALE for a list of features.

    Args:
        features: list of features to fit
        heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
        heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
        binning_method: binning method to use
        max_depth: maximum number of splits to perform (depth of the tree)
        nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
        min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
        candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
    """

    assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
    features = helpers.prep_features(features, self.dim)
    for feat in tqdm(features):
        heter = self._create_heterogeneity_function(
            feat, binning_method, min_points_per_subregion, centering
        )

        self._fit_feature(
            feat,
            heter,
            heter_pcg_drop_thres,
            heter_small_enough,
            max_depth,
            nof_candidate_splits_for_numerical,
            min_points_per_subregion,
            candidate_conditioning_features,
            split_categorical_features,
        )

        self.method_args["feature_" + str(feat)] = {
            "heter_pcg_drop_thres": heter_pcg_drop_thres,
            "heter_small_enough": heter_small_enough,
            "max_depth": max_depth,
            "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
            "min_points_per_subregion": min_points_per_subregion,
            "candidate_conditioning_features": candidate_conditioning_features,
            "split_categorical_features": split_categorical_features,
            "binning_method": binning_method,
        }

`effector.regional_effect_pdp.RegionalPDPBase`

Bases: RegionalEffectBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

class RegionalPDPBase(RegionalEffectBase):
    def __init__(
        self,
        method_name: str,
        data: np.ndarray,
        model: callable,
        model_jac: typing.Union[None, callable] = None,
        nof_instances: typing.Union[int, str] = 100,
        axis_limits: typing.Union[None, np.ndarray] = None,
        feature_types: typing.Union[list, None] = None,
        cat_limit: typing.Union[int, None] = 10,
        feature_names: typing.Union[list, None] = None,
        target_name: typing.Union[str, None] = None,
    ):
        super(RegionalPDPBase, self).__init__(
            method_name,
            data,
            model,
            model_jac,
            None,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name)

    def _create_heterogeneity_function(self, foi, min_points, centering, nof_instances, points_for_centering):
        def heter(data) -> float:
            if data.shape[0] < min_points:
                return BIG_M

            if self.method_name == "pdp":
                pdp = PDP(data, self.model, self.axis_limits, nof_instances=nof_instances)
            else:
                pdp = DerPDP(data, self.model, self.model_jac, self.axis_limits, nof_instances=nof_instances)

            try:
                pdp.fit(features=foi, centering=centering, points_for_centering=points_for_centering)
            except:
                return BIG_M

            # heterogeneity is the mean heterogeneity over the curve
            axis_limits = helpers.axis_limits_from_data(data)

            xx = np.linspace(axis_limits[:, foi][0], axis_limits[:, foi][1], 10)
            try:
                _, z = pdp.eval(feature=foi, xs=xx, heterogeneity=True)
            except:
                return BIG_M
            return np.mean(z)

        return heter

    def fit(
        self,
        features: typing.Union[int, str, list] = "all",
        heter_pcg_drop_thres: float = 0.1,
        heter_small_enough: float = 0.1,
        max_depth: int = 1,
        nof_candidate_splits_for_numerical: int = 20,
        min_points_per_subregion: int = 10,
        candidate_conditioning_features: typing.Union["str", list] = "all",
        split_categorical_features: bool = False,
        centering: typing.Union[bool, str] = False,
        nof_instances: int = "all",
        points_for_centering: int = 100,
    ):
        """
        Find the Regional PDP for a list of features.

        Args:
            features: list of features to fit
            heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
            heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
            max_depth: maximum number of splits to perform (depth of the tree)
            nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
            min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
            candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
            split_categorical_features
        """

        assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
        features = helpers.prep_features(features, self.dim)
        for feat in tqdm(features):
            heter = self._create_heterogeneity_function(feat, min_points_per_subregion, centering, nof_instances, points_for_centering)

            self._fit_feature(
                feat,
                heter,
                heter_pcg_drop_thres,
                heter_small_enough,
                max_depth,
                nof_candidate_splits_for_numerical,
                min_points_per_subregion,
                candidate_conditioning_features,
                split_categorical_features,
            )

`fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, nof_instances='all', points_for_centering=100)`

Find the Regional PDP for a list of features.

Parameters:

Name	Type	Description	Default
`features`	`typing.Union[int, str, list]`	list of features to fit	`'all'`
`heter_pcg_drop_thres`	`float`	heterogeneity drop threshold for a split to be considered important	`0.1`
`heter_small_enough`	`float`	heterogeneity threshold for a region to be considered homogeneous (splitting stops)	`0.1`
`max_depth`	`int`	maximum number of splits to perform (depth of the tree)	`1`
`nof_candidate_splits_for_numerical`	`int`	number of candidate splits to consider for numerical features	`20`
`min_points_per_subregion`	`int`	minimum allowed number of points in a subregion (otherwise the split is not considered as valid)	`10`
`candidate_conditioning_features`	`typing.Union[str, list]`	list of features to consider as conditioning features for the candidate splits	`'all'`

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

def fit(
    self,
    features: typing.Union[int, str, list] = "all",
    heter_pcg_drop_thres: float = 0.1,
    heter_small_enough: float = 0.1,
    max_depth: int = 1,
    nof_candidate_splits_for_numerical: int = 20,
    min_points_per_subregion: int = 10,
    candidate_conditioning_features: typing.Union["str", list] = "all",
    split_categorical_features: bool = False,
    centering: typing.Union[bool, str] = False,
    nof_instances: int = "all",
    points_for_centering: int = 100,
):
    """
    Find the Regional PDP for a list of features.

    Args:
        features: list of features to fit
        heter_pcg_drop_thres: heterogeneity drop threshold for a split to be considered important
        heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
        max_depth: maximum number of splits to perform (depth of the tree)
        nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
        min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
        candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
        split_categorical_features
    """

    assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
    features = helpers.prep_features(features, self.dim)
    for feat in tqdm(features):
        heter = self._create_heterogeneity_function(feat, min_points_per_subregion, centering, nof_instances, points_for_centering)

        self._fit_feature(
            feat,
            heter,
            heter_pcg_drop_thres,
            heter_small_enough,
            max_depth,
            nof_candidate_splits_for_numerical,
            min_points_per_subregion,
            candidate_conditioning_features,
            split_categorical_features,
        )

`effector.regional_effect_pdp.RegionalPDP`

Bases: RegionalPDPBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

class RegionalPDP(RegionalPDPBase):
    def __init__(
        self,
        data: np.ndarray,
        model: callable,
        nof_instances: typing.Union[int, str] = 1000,
        axis_limits: typing.Union[None, np.ndarray] = None,
        feature_types: typing.Union[list, None] = None,
        cat_limit: typing.Union[int, None] = 10,
        feature_names: typing.Union[list, None] = None,
        target_name: typing.Union[str, None] = None,
    ):
        super(RegionalPDP, self).__init__(
            "pdp",
            data,
            model,
            None,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name)

`init(data, model, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

def __init__(
    self,
    data: np.ndarray,
    model: callable,
    nof_instances: typing.Union[int, str] = 1000,
    axis_limits: typing.Union[None, np.ndarray] = None,
    feature_types: typing.Union[list, None] = None,
    cat_limit: typing.Union[int, None] = 10,
    feature_names: typing.Union[list, None] = None,
    target_name: typing.Union[str, None] = None,
):
    super(RegionalPDP, self).__init__(
        "pdp",
        data,
        model,
        None,
        nof_instances,
        axis_limits,
        feature_types,
        cat_limit,
        feature_names,
        target_name)

`effector.regional_effect_pdp.RegionalDerPDP`

Bases: RegionalPDPBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

class RegionalDerPDP(RegionalPDPBase):
    def __init__(
        self,
        data: np.ndarray,
        model: callable,
        model_jac: typing.Union[None, callable] = None,
        nof_instances: typing.Union[int, str] = 1000,
        axis_limits: typing.Union[None, np.ndarray] = None,
        feature_types: typing.Union[list, None] = None,
        cat_limit: typing.Union[int, None] = 10,
        feature_names: typing.Union[list, None] = None,
        target_name: typing.Union[str, None] = None,
    ):
        super(RegionalDerPDP, self).__init__(
            "d-pdp",
            data,
            model,
            model_jac,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name)

`init(data, model, model_jac=None, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

Source code in /home/runner/work/effector/effector/effector/regional_effect_pdp.py

def __init__(
    self,
    data: np.ndarray,
    model: callable,
    model_jac: typing.Union[None, callable] = None,
    nof_instances: typing.Union[int, str] = 1000,
    axis_limits: typing.Union[None, np.ndarray] = None,
    feature_types: typing.Union[list, None] = None,
    cat_limit: typing.Union[int, None] = 10,
    feature_names: typing.Union[list, None] = None,
    target_name: typing.Union[str, None] = None,
):
    super(RegionalDerPDP, self).__init__(
        "d-pdp",
        data,
        model,
        model_jac,
        nof_instances,
        axis_limits,
        feature_types,
        cat_limit,
        feature_names,
        target_name)

`effector.regional_effect_shap.RegionalShapDP`

Bases: RegionalEffectBase

Source code in /home/runner/work/effector/effector/effector/regional_effect_shap.py

class RegionalShapDP(RegionalEffectBase):
    big_m = helpers.BIG_M

    def __init__(
        self,
        data: np.ndarray,
        model: Callable,
        axis_limits: Optional[np.ndarray] = None,
        nof_instances: Union[int, str] = 100,
        feature_types: Optional[List[str]] = None,
        cat_limit: Optional[int] = 10,
        feature_names: Optional[List[str]] = None,
        target_name: Optional[str] = None,
    ):
        """
        Regional SHAP constructor.

        Args:
            data: the design matrix

                - shape: `(N,D)`
            model: the black-box model. Must be a `Callable` with:

                - input: `ndarray` of shape `(N, D)`
                - output: `ndarray` of shape `(N, )`

            axis_limits: The limits of the feature effect plot along each axis

                - use a `ndarray` of shape `(2, D)`, to specify them manually
                - use `None`, to be inferred from the data

            nof_instances: maximum number of instances to be used for PDP.

                - use "all", for using all instances.
                - use an `int`, for using `nof_instances` instances.

            feature_types: The feature types.

                - use `None`, to infer them from the data; whether a feature is categorical or numerical is inferred
                from whether it exceeds the `cat_limit` unique values.
                - use a list with elements `"cat"` or `"numerical"`, to specify them manually.

            cat_limit: the minimum number of unique values for a feature to be considered categorical

            feature_names: The names of the features

                - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
                - use `None`, to keep the default names: `["x_0", "x_1", ...]`

            target_name: The name of the target variable

                - use a `str`, to specify it name manually. For example: `"price"`
                - use `None`, to keep the default name: `"y"`
        """
        super(RegionalShapDP, self).__init__(
            "shap",
            data,
            model,
            None,
            None,
            nof_instances,
            axis_limits,
            feature_types,
            cat_limit,
            feature_names,
            target_name
        )

    def _create_heterogeneity_function(self, foi, min_points, centering, points_for_centering):

        def heterogeneity_function(data) -> float:
            if data.shape[0] < min_points:
                return self.big_m

            axis_limits = helpers.axis_limits_from_data(data)
            xx = np.linspace(axis_limits[:, foi][0], axis_limits[:, foi][1], 10)

            shap = ShapDP(data, self.model, None, self.nof_instances)
            shap.fit(foi, centering, points_for_centering)
            _, z = shap.eval(foi, xx, heterogeneity=True)
            return np.mean(z)

        return heterogeneity_function

    def fit(
            self,
            features: typing.Union[int, str, list],
            heter_pcg_drop_thres: float = 0.1,
            heter_small_enough: float = 0.1,
            max_depth: int = 1,
            nof_candidate_splits_for_numerical: int = 20,
            min_points_per_subregion: int = 10,
            candidate_conditioning_features: typing.Union["str", list] = "all",
            split_categorical_features: bool = False,
            centering: typing.Union[bool, str] = False,
            points_for_centering: int = 100,
    ):
        """
        Fit the regional SHAP.

        Args:
            features: the features to fit.
                - If set to "all", all the features will be fitted.

            heter_pcg_drop_thres: threshold for the percentage drop in heterogeneity to consider a split valid
            heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
            max_depth: maximum number of splits to perform (depth of the tree)
            nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
            min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
            candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
            split_categorical_features: whether to search for subregions in categorical features
            centering: whether to center the SHAP dependence plots before estimating the heterogeneity
            points_for_centering: number of points to use for centering
        """
        assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
        features = helpers.prep_features(features, self.dim)
        for feat in tqdm(features):
            heter = self._create_heterogeneity_function(
                feat, min_points_per_subregion, centering, points_for_centering
            )

            self._fit_feature(
                feat,
                heter,
                heter_pcg_drop_thres,
                heter_small_enough,
                max_depth,
                nof_candidate_splits_for_numerical,
                min_points_per_subregion,
                candidate_conditioning_features,
                split_categorical_features,
            )

            self.method_args["feature_" + str(feat)] = {
                "heter_pcg_drop_thres": heter_pcg_drop_thres,
                "heter_small_enough": heter_small_enough,
                "max_depth": max_depth,
                "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
                "min_points_per_subregion": min_points_per_subregion,
                "candidate_conditioning_features": candidate_conditioning_features,
                "split_categorical_features": split_categorical_features,
                "centering": centering,
                "points_for_centering": points_for_centering,
            }

`init(data, model, axis_limits=None, nof_instances=100, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

Regional SHAP constructor.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	the design matrix shape: `(N,D)`	required
`model`	`Callable`	the black-box model. Must be a `Callable` with: input: `ndarray` of shape `(N, D)` output: `ndarray` of shape `(N, )`	required
`axis_limits`	`Optional[np.ndarray]`	The limits of the feature effect plot along each axis use a `ndarray` of shape `(2, D)`, to specify them manually use `None`, to be inferred from the data	`None`
`nof_instances`	`Union[int, str]`	maximum number of instances to be used for PDP. use "all", for using all instances. use an `int`, for using `nof_instances` instances.	`100`
`feature_types`	`Optional[List[str]]`	The feature types. use `None`, to infer them from the data; whether a feature is categorical or numerical is inferred from whether it exceeds the `cat_limit` unique values. use a list with elements `"cat"` or `"numerical"`, to specify them manually.	`None`
`cat_limit`	`Optional[int]`	the minimum number of unique values for a feature to be considered categorical	`10`
`feature_names`	`Optional[List[str]]`	The names of the features use a `list` of `str`, to specify the name manually. For example: `["age", "weight", ...]` use `None`, to keep the default names: `["x_0", "x_1", ...]`	`None`
`target_name`	`Optional[str]`	The name of the target variable use a `str`, to specify it name manually. For example: `"price"` use `None`, to keep the default name: `"y"`	`None`

Source code in /home/runner/work/effector/effector/effector/regional_effect_shap.py

def __init__(
    self,
    data: np.ndarray,
    model: Callable,
    axis_limits: Optional[np.ndarray] = None,
    nof_instances: Union[int, str] = 100,
    feature_types: Optional[List[str]] = None,
    cat_limit: Optional[int] = 10,
    feature_names: Optional[List[str]] = None,
    target_name: Optional[str] = None,
):
    """
    Regional SHAP constructor.

    Args:
        data: the design matrix

            - shape: `(N,D)`
        model: the black-box model. Must be a `Callable` with:

            - input: `ndarray` of shape `(N, D)`
            - output: `ndarray` of shape `(N, )`

        axis_limits: The limits of the feature effect plot along each axis

            - use a `ndarray` of shape `(2, D)`, to specify them manually
            - use `None`, to be inferred from the data

        nof_instances: maximum number of instances to be used for PDP.

            - use "all", for using all instances.
            - use an `int`, for using `nof_instances` instances.

        feature_types: The feature types.

            - use `None`, to infer them from the data; whether a feature is categorical or numerical is inferred
            from whether it exceeds the `cat_limit` unique values.
            - use a list with elements `"cat"` or `"numerical"`, to specify them manually.

        cat_limit: the minimum number of unique values for a feature to be considered categorical

        feature_names: The names of the features

            - use a `list` of `str`, to specify the name manually. For example: `                  ["age", "weight", ...]`
            - use `None`, to keep the default names: `["x_0", "x_1", ...]`

        target_name: The name of the target variable

            - use a `str`, to specify it name manually. For example: `"price"`
            - use `None`, to keep the default name: `"y"`
    """
    super(RegionalShapDP, self).__init__(
        "shap",
        data,
        model,
        None,
        None,
        nof_instances,
        axis_limits,
        feature_types,
        cat_limit,
        feature_names,
        target_name
    )

`fit(features, heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, points_for_centering=100)`

Fit the regional SHAP.

Parameters:

Name	Type	Description	Default
`features`	`typing.Union[int, str, list]`	the features to fit. - If set to "all", all the features will be fitted.	required
`heter_pcg_drop_thres`	`float`	threshold for the percentage drop in heterogeneity to consider a split valid	`0.1`
`heter_small_enough`	`float`	heterogeneity threshold for a region to be considered homogeneous (splitting stops)	`0.1`
`max_depth`	`int`	maximum number of splits to perform (depth of the tree)	`1`
`nof_candidate_splits_for_numerical`	`int`	number of candidate splits to consider for numerical features	`20`
`min_points_per_subregion`	`int`	minimum allowed number of points in a subregion (otherwise the split is not considered as valid)	`10`
`candidate_conditioning_features`	`typing.Union[str, list]`	list of features to consider as conditioning features for the candidate splits	`'all'`
`split_categorical_features`	`bool`	whether to search for subregions in categorical features	`False`
`centering`	`typing.Union[bool, str]`	whether to center the SHAP dependence plots before estimating the heterogeneity	`False`
`points_for_centering`	`int`	number of points to use for centering	`100`

Source code in /home/runner/work/effector/effector/effector/regional_effect_shap.py

def fit(
        self,
        features: typing.Union[int, str, list],
        heter_pcg_drop_thres: float = 0.1,
        heter_small_enough: float = 0.1,
        max_depth: int = 1,
        nof_candidate_splits_for_numerical: int = 20,
        min_points_per_subregion: int = 10,
        candidate_conditioning_features: typing.Union["str", list] = "all",
        split_categorical_features: bool = False,
        centering: typing.Union[bool, str] = False,
        points_for_centering: int = 100,
):
    """
    Fit the regional SHAP.

    Args:
        features: the features to fit.
            - If set to "all", all the features will be fitted.

        heter_pcg_drop_thres: threshold for the percentage drop in heterogeneity to consider a split valid
        heter_small_enough: heterogeneity threshold for a region to be considered homogeneous (splitting stops)
        max_depth: maximum number of splits to perform (depth of the tree)
        nof_candidate_splits_for_numerical: number of candidate splits to consider for numerical features
        min_points_per_subregion: minimum allowed number of points in a subregion (otherwise the split is not considered as valid)
        candidate_conditioning_features: list of features to consider as conditioning features for the candidate splits
        split_categorical_features: whether to search for subregions in categorical features
        centering: whether to center the SHAP dependence plots before estimating the heterogeneity
        points_for_centering: number of points to use for centering
    """
    assert min_points_per_subregion >= 2, "min_points_per_subregion must be >= 2"
    features = helpers.prep_features(features, self.dim)
    for feat in tqdm(features):
        heter = self._create_heterogeneity_function(
            feat, min_points_per_subregion, centering, points_for_centering
        )

        self._fit_feature(
            feat,
            heter,
            heter_pcg_drop_thres,
            heter_small_enough,
            max_depth,
            nof_candidate_splits_for_numerical,
            min_points_per_subregion,
            candidate_conditioning_features,
            split_categorical_features,
        )

        self.method_args["feature_" + str(feat)] = {
            "heter_pcg_drop_thres": heter_pcg_drop_thres,
            "heter_small_enough": heter_small_enough,
            "max_depth": max_depth,
            "nof_candidate_splits_for_numerical": nof_candidate_splits_for_numerical,
            "min_points_per_subregion": min_points_per_subregion,
            "candidate_conditioning_features": candidate_conditioning_features,
            "split_categorical_features": split_categorical_features,
            "centering": centering,
            "points_for_centering": points_for_centering,
        }

Binning Methods

`effector.binning_methods.Fixed`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

class Fixed:
    def __init__(self,
                 nof_bins: int = 100,
                 min_points_per_bin=10,
                 cat_limit: int = 15
                 ):
        self.nof_bins = nof_bins
        self.min_points_per_bin = min_points_per_bin
        self.cat_limit = cat_limit

`init(nof_bins=100, min_points_per_bin=10, cat_limit=15)`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

def __init__(self,
             nof_bins: int = 100,
             min_points_per_bin=10,
             cat_limit: int = 15
             ):
    self.nof_bins = nof_bins
    self.min_points_per_bin = min_points_per_bin
    self.cat_limit = cat_limit

`effector.binning_methods.Greedy`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

class Greedy:
    def __init__(self,
                 init_nof_bins: int = 100,
                 min_points_per_bin: int = 10,
                 discount: float = 0.3,
                 cat_limit: int = 15
                 ):
        self.max_nof_bins = init_nof_bins
        self.min_points_per_bin = min_points_per_bin
        self.discount = discount
        self.cat_limit = cat_limit

`init(init_nof_bins=100, min_points_per_bin=10, discount=0.3, cat_limit=15)`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

def __init__(self,
             init_nof_bins: int = 100,
             min_points_per_bin: int = 10,
             discount: float = 0.3,
             cat_limit: int = 15
             ):
    self.max_nof_bins = init_nof_bins
    self.min_points_per_bin = min_points_per_bin
    self.discount = discount
    self.cat_limit = cat_limit

`effector.binning_methods.DynamicProgramming`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

class DynamicProgramming:
    def __init__(self,
                 max_nof_bins: int = 20,
                 min_points_per_bin: int = 10,
                 discount: float = 0.3,
                 cat_limit: int = 15):
        self.max_nof_bins = max_nof_bins
        self.min_points_per_bin = min_points_per_bin
        self.discount = discount
        self.cat_limit = cat_limit

`init(max_nof_bins=20, min_points_per_bin=10, discount=0.3, cat_limit=15)`

Source code in /home/runner/work/effector/effector/effector/binning_methods.py

def __init__(self,
             max_nof_bins: int = 20,
             min_points_per_bin: int = 10,
             discount: float = 0.3,
             cat_limit: int = 15):
    self.max_nof_bins = max_nof_bins
    self.min_points_per_bin = min_points_per_bin
    self.discount = discount
    self.cat_limit = cat_limit

Utils

`compute_accumulated_effect(x, limits, bin_effect, dx, square=False)`

Compute the accumulated effect at each point x.

Notes

The function implements the following formula:

\[ \mathtt{dx}[i] = \mathtt{limits}[i+1] - \mathtt{limits}[i] \]

\[ \mathtt{full\_bin\_acc} = \sum_{i=0}^{k_x - 1} \mathtt{dx}[i] * \mathtt{bin\_effect}[i] \]

\[ \mathtt{remainder} = (x - \mathtt{limits}[k_x-1])* \mathtt{bin\_effect}[k_x] \]

\[ f(x) = \mathtt{full\_bin\_acc} + \mathtt{remainder} \]

Notes

if square=True, then the formula is: $$ \mathtt{full_bin_acc} = \sum_{i=0}^{k_x - 1} \mathtt{dx}^2[i] * \mathtt{bin_effect}[i] $$

\[ \mathtt{remainder} = (x - \mathtt{limits}[k_x-1])^2* \mathtt{bin\_effect}[k_x] \]

Examples:

>>> x = np.array([-1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0])
>>> limits = np.array([0, 1.5, 2.0])
>>> bin_effect = np.array([1.0, -1.0])
>>> dx = np.array([1.5, 0.5])
>>> compute_accumulated_effect(x, limits, bin_effect, dx)
array([0. , 0. , 0. , 0.5, 1. , 1.5, 1. , 1. , 1. ])

>>> x = np.array([-1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0])
>>> limits = np.array([0, 1.5, 2.0])
>>> bin_effect = np.array([1.0, 1.0])
>>> dx = np.array([1.5, 0.5])
>>> compute_accumulated_effect(x, limits, bin_effect, dx)
array([0. , 0. , 0. , 0.5, 1. , 1.5, 2. , 2. , 2. ])

Parameters:

Name	Type	Description	Default
`x`	`np.ndarray`	The points we want to evaluate at, (T)	required
`limits`	`np.ndarray`	The bin limits, (K+1)	required
`bin_effect`	`np.ndarray`	The effect in each bin, (K)	required
`dx`	`np.ndarray`	The bin-widths, (K)	required
`square`	`bool`	Whether to square the width. If true, the effect is bin_effect * dx^2, otherwise bin_effect * dx	`False`

Returns:

Name	Type	Description
`y`	`np.ndarray`	The accumulated effect at each point, (T)

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_accumulated_effect(
    x: np.ndarray,
    limits: np.ndarray,
    bin_effect: np.ndarray,
    dx: np.ndarray,
    square: bool = False,
) -> np.ndarray:
    """Compute the accumulated effect at each point `x`.

    Notes:
        The function implements the following formula:

        $$
        \mathtt{dx}[i] = \mathtt{limits}[i+1] - \mathtt{limits}[i]
        $$

        $$
        \mathtt{full\_bin\_acc} = \sum_{i=0}^{k_x - 1} \mathtt{dx}[i] * \mathtt{bin\_effect}[i]
        $$

        $$
        \mathtt{remainder} = (x - \mathtt{limits}[k_x-1])* \mathtt{bin\_effect}[k_x]
        $$

        $$
        f(x) =  \mathtt{full\_bin\_acc} + \mathtt{remainder}
        $$

    Notes:
        if `square=True`, then the formula is:
        $$
        \mathtt{full\_bin\_acc} = \sum_{i=0}^{k_x - 1} \mathtt{dx}^2[i] * \mathtt{bin\_effect}[i]
        $$

        $$
        \mathtt{remainder} = (x - \mathtt{limits}[k_x-1])^2* \mathtt{bin\_effect}[k_x]
        $$

    Examples:
        >>> x = np.array([-1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0])
        >>> limits = np.array([0, 1.5, 2.0])
        >>> bin_effect = np.array([1.0, -1.0])
        >>> dx = np.array([1.5, 0.5])
        >>> compute_accumulated_effect(x, limits, bin_effect, dx)
        array([0. , 0. , 0. , 0.5, 1. , 1.5, 1. , 1. , 1. ])

        >>> x = np.array([-1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0])
        >>> limits = np.array([0, 1.5, 2.0])
        >>> bin_effect = np.array([1.0, 1.0])
        >>> dx = np.array([1.5, 0.5])
        >>> compute_accumulated_effect(x, limits, bin_effect, dx)
        array([0. , 0. , 0. , 0.5, 1. , 1.5, 2. , 2. , 2. ])



    Parameters:
        x: The points we want to evaluate at, (T)
        limits: The bin limits, (K+1)
        bin_effect: The effect in each bin, (K)
        dx: The bin-widths, (K)
        square: Whether to square the width. If true, the effect is bin_effect * dx^2, otherwise bin_effect * dx

    Returns:
        y: The accumulated effect at each point, (T)


    """
    # find where each point belongs to
    ind = np.digitize(x, limits)

    # for each point, find the accumulated full-bin effect
    x_cumsum = (bin_effect * dx**2).cumsum() if square else (bin_effect * dx).cumsum()
    tmp = np.concatenate([[0, 0], x_cumsum])
    full_bin_effect = tmp[ind]

    # for each point, find the remaining effect
    tmp = np.concatenate([[limits[0]], limits[:-1], [BIG_M]])
    deltas = x - tmp[ind]
    deltas[deltas < 0] = 0  # if xs < left_limit => delta = 0
    deltas = deltas**2 if square else deltas
    tmp = np.concatenate([[0.0], bin_effect, [bin_effect[-1]]])
    remaining_effect = deltas * tmp[ind]

    # final effect
    y = full_bin_effect + remaining_effect
    return y

`compute_ale_params(xs, df_dxs, limits)`

Compute all important parameters for the ALE plot.

Examples:

>>> # Example without interpolation
>>> xs = np.array([0.5, 1.2, 2, 2.3])
>>> df_dxs = np.array([30, 34, 15, 17])
>>> limits = np.array([0, 1.5, 3.])
>>> compute_ale_params(xs, df_dxs, limits)
{'limits': array([0. , 1.5, 3. ]), 'dx': array([1.5, 1.5]), 'points_per_bin': array([2, 2]), 'bin_effect': array([32., 16.]), 'bin_variance': array([4., 1.]), 'bin_estimator_variance': array([2. , 0.5])}

>>> # Example with interpolation
>>> xs = np.array([1, 2, 2.8, 4])
>>> df_dxs = np.array([31, 34, 37, 40])
>>> limits = np.array([1, 3, 4])
>>> compute_ale_params(xs, df_dxs, limits)
{'limits': array([1, 3, 4]), 'dx': array([2, 1]), 'points_per_bin': array([3, 1]), 'bin_effect': array([34., 40.]), 'bin_variance': array([6., 6.]), 'bin_estimator_variance': array([2., 2.])}

Parameters:

Name	Type	Description	Default
`xs`	`np.ndarray`	The values of s-th feature, (N)	required
`df_dxs`	`np.ndarray`	The effect wrt the s-th feature, (N)	required
`limits`	`np.ndarray`	The bin limits, (K+1)	required

Returns:

Name	Type	Description
`parameters`	`dict`	dict

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_ale_params(xs: np.ndarray, df_dxs: np.ndarray, limits: np.ndarray) -> dict:
    """
    Compute all important parameters for the ALE plot.

    Examples:
        >>> # Example without interpolation
        >>> xs = np.array([0.5, 1.2, 2, 2.3])
        >>> df_dxs = np.array([30, 34, 15, 17])
        >>> limits = np.array([0, 1.5, 3.])
        >>> compute_ale_params(xs, df_dxs, limits)
        {'limits': array([0. , 1.5, 3. ]), 'dx': array([1.5, 1.5]), 'points_per_bin': array([2, 2]), 'bin_effect': array([32., 16.]), 'bin_variance': array([4., 1.]), 'bin_estimator_variance': array([2. , 0.5])}

        >>> # Example with interpolation
        >>> xs = np.array([1, 2, 2.8, 4])
        >>> df_dxs = np.array([31, 34, 37, 40])
        >>> limits = np.array([1, 3, 4])
        >>> compute_ale_params(xs, df_dxs, limits)
        {'limits': array([1, 3, 4]), 'dx': array([2, 1]), 'points_per_bin': array([3, 1]), 'bin_effect': array([34., 40.]), 'bin_variance': array([6., 6.]), 'bin_estimator_variance': array([2., 2.])}

    Args:
        xs: The values of s-th feature, (N)
        df_dxs: The effect wrt the s-th feature, (N)
        limits: The bin limits, (K+1)

    Returns:
        parameters: dict

    """
    # compute bin-widths
    dx = np.array([limits[i + 1] - limits[i] for i in range(len(limits) - 1)])

    # compute mean effect on each bin
    bin_effect_nans, points_per_bin = compute_bin_effect(xs, df_dxs, limits)

    # compute effect variance in each bin
    bin_variance_nans, bin_estimator_variance_nans = compute_bin_variance(
        xs, df_dxs, limits, bin_effect_nans
    )

    # interpolate NaNs
    bin_effect = fill_nans(bin_effect_nans)
    bin_variance = fill_nans(bin_variance_nans)
    bin_estimator_variance = fill_nans(bin_estimator_variance_nans)

    parameters = {
        "limits": limits,
        "dx": dx,
        "points_per_bin": points_per_bin,
        "bin_effect": bin_effect,
        "bin_variance": bin_variance,
        "bin_estimator_variance": bin_estimator_variance,
    }
    return parameters

`compute_bin_effect(xs, df_dxs, limits)`

Compute the mean effect in each bin.

Notes

The function (a) allocates the instances in the bins and (b) aggregates the instance-level effects to compute the average bin-effect. If no instances lie in a bin, then the bin effect is NaN.

\[ \mathtt{bin\_effect}_k = {1 \over |i \in bin_k|} \sum_{i \in bin_k} \mathtt{effect}_i \]

Examples:

>>> n = 100
>>> xs = np.ones([n]) - 0.5
>>> df_dxs = np.ones_like(xs) * 10
>>> limits = np.array([0., 1., 2.0])
>>> bin_effects, ppb = compute_bin_effect(xs, df_dxs, limits)
>>> bin_effects
array([10., nan])
>>> ppb
array([100,   0])

Parameters:

Name	Type	Description	Default
`xs`	`np.ndarray`	The s-th feature of the instances, (N)	required
`df_dxs`	`np.ndarray`	The effect wrt the s-th feature for each instance, (N)	required
`limits`	`np.ndarray`	The bin limits, (K+1)	required

Returns:

Name	Type	Description
`bin_effects`	`np.ndarray`	The average effect per bin, (K)
`points_per_bin`	`np.ndarray`	The number of points per bin, (K)

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_bin_effect(
    xs: np.ndarray, df_dxs: np.ndarray, limits: np.ndarray
) -> typing.Tuple[np.ndarray, np.ndarray]:
    """Compute the mean effect in each bin.

    Notes:
        The function (a) allocates the instances in the bins and (b) aggregates the instance-level effects to compute
        the average bin-effect. If no instances lie in a bin, then the bin effect is NaN.

        $$
        \mathtt{bin\_effect}_k = {1 \over |i \in bin_k|} \sum_{i \in bin_k} \mathtt{effect}_i
        $$

    Examples:
        >>> n = 100
        >>> xs = np.ones([n]) - 0.5
        >>> df_dxs = np.ones_like(xs) * 10
        >>> limits = np.array([0., 1., 2.0])
        >>> bin_effects, ppb = compute_bin_effect(xs, df_dxs, limits)
        >>> bin_effects
        array([10., nan])
        >>> ppb
        array([100,   0])

    Parameters:
        xs: The s-th feature of the instances, (N)
        df_dxs: The effect wrt the s-th feature for each instance, (N)
        limits: The bin limits, (K+1)

    Returns:
        bin_effects: The average effect per bin, (K)
        points_per_bin: The number of points per bin, (K)
    """
    empty_symbol = np.NaN

    # find bin-index of points
    limits_enh = copy.deepcopy(limits).astype(float)
    limits_enh[-1] += EPS
    ind = np.digitize(xs, limits_enh)
    # assert np.alltrue(ind > 0)

    # bin effect is the mean of all points that lie in the bin
    nof_bins = limits.shape[0] - 1
    aggregated_effect = np.bincount(ind - 1, df_dxs, minlength=nof_bins)
    points_per_bin = np.bincount(ind - 1, minlength=nof_bins)

    # if no point lies in a bin, store Nan
    bin_effect_mean = np.divide(
        aggregated_effect,
        points_per_bin,
        out=np.ones(aggregated_effect.shape, dtype=float) * empty_symbol,
        where=points_per_bin != 0,
    )
    return bin_effect_mean, points_per_bin

`compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)`

Compute the variance of the effect in each bin.

Notes

The function (a) allocates the points in the bins and (b) computes the variance and the variance/nof points. If less than two points in a bin, NaN is passed.

\[ \mathtt{bin\_variance}_k = {1 \over |i \in bin_k|} \sum_{i \in bin_k} (\mathtt{effect}_i - \mathtt{bin\_effect}_k)^2 \]

\[ \mathtt{bin\_estimator\_variance_k} = {\mathtt{bin\_variance}_k \over |i \in bin_k|} \]

Examples:

>>> n = 100
>>> xs = np.ones([n]) - 0.5
>>> df_dxs = np.ones_like(xs) * 10
>>> limits = np.array([0., 1., 2.0])
>>> bin_effect_mean, ppb = compute_bin_effect(xs, df_dxs, limits)
>>> bin_variance, bin_estimator_variance = compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)
>>> bin_variance
array([ 0., nan])
>>> bin_estimator_variance
array([ 0., nan])

>>> xs = np.ones(4) * 0.5
>>> df_dxs = np.array([1.0, 3.0, 3.0, 5.0])
>>> limits = np.array([0, 1, 2.0])
>>> bin_effect_mean = np.array([np.mean(df_dxs), np.NaN])
>>> compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)
(array([ 2., nan]), array([0.5, nan]))

Parameters:

Name	Type	Description	Default
`xs`	`np.ndarray`	The points we evaluate, (N)	required
`df_dxs`	`np.ndarray`	The effect of each point, (N, )	required
`limits`	`np.ndarray`	The bin limits (K+1)	required
`bin_effect_mean`	`np.ndarray`	Mean effect in each bin, (K)	required

Returns:

Name	Type	Description
`bin_variance`	`np.ndarray`	The variance in each bin, (K, )
`bin_estimator_variance`	`np.ndarray`	The variance of the estimator in each bin, (K, )

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_bin_variance(
    xs: np.ndarray, df_dxs: np.ndarray, limits: np.ndarray, bin_effect_mean: np.ndarray
) -> typing.Tuple[np.ndarray, np.ndarray]:
    """
    Compute the variance of the effect in each bin.

    Notes:
        The function (a) allocates the points in the bins and (b) computes the variance and the variance/nof points.
        If less than two points in a bin, NaN is passed.

        $$
        \mathtt{bin\_variance}_k = {1 \over |i \in bin_k|} \sum_{i \in bin_k}
        (\mathtt{effect}_i - \mathtt{bin\_effect}_k)^2
        $$

        $$
        \mathtt{bin\_estimator\_variance_k} = {\mathtt{bin\_variance}_k \over |i \in bin_k|}
        $$

    Examples:
        >>> n = 100
        >>> xs = np.ones([n]) - 0.5
        >>> df_dxs = np.ones_like(xs) * 10
        >>> limits = np.array([0., 1., 2.0])
        >>> bin_effect_mean, ppb = compute_bin_effect(xs, df_dxs, limits)
        >>> bin_variance, bin_estimator_variance = compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)
        >>> bin_variance
        array([ 0., nan])
        >>> bin_estimator_variance
        array([ 0., nan])

        >>> xs = np.ones(4) * 0.5
        >>> df_dxs = np.array([1.0, 3.0, 3.0, 5.0])
        >>> limits = np.array([0, 1, 2.0])
        >>> bin_effect_mean = np.array([np.mean(df_dxs), np.NaN])
        >>> compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)
        (array([ 2., nan]), array([0.5, nan]))

    Parameters:
        xs: The points we evaluate, (N)
        df_dxs: The effect of each point, (N, )
        limits: The bin limits (K+1)
        bin_effect_mean: Mean effect in each bin, (K)

    Returns:
        bin_variance: The variance in each bin, (K, )
        bin_estimator_variance: The variance of the estimator in each bin, (K, )

    """
    empty_symbol = np.NaN

    # find bin-index of points
    eps = 1e-8
    limits_enh = copy.deepcopy(limits).astype(float)
    limits_enh[-1] += eps
    ind = np.digitize(xs, limits_enh)
    # assert np.alltrue(ind > 0)

    # variance of the effect in each bin
    variance_per_point = (df_dxs - bin_effect_mean[ind - 1]) ** 2
    nof_bins = limits.shape[0] - 1
    aggregated_variance_per_bin = np.bincount(
        ind - 1, variance_per_point, minlength=nof_bins
    )
    points_per_bin = np.bincount(ind - 1, minlength=nof_bins)

    # if less than two points in a bin, store Nan
    bin_variance = np.divide(
        aggregated_variance_per_bin,
        points_per_bin,
        out=np.ones(aggregated_variance_per_bin.shape, dtype=float) * empty_symbol,
        where=points_per_bin > 1,
    )

    # the variance of the estimator
    bin_estimator_variance = np.divide(
        bin_variance,
        points_per_bin,
        out=np.ones(aggregated_variance_per_bin.shape, dtype=float) * empty_symbol,
        where=points_per_bin > 1,
    )
    return bin_variance, bin_estimator_variance

`compute_jacobian_numerically(model, data, eps=1e-08)`

Compute the Jacobian of the model using finite differences.

Notes

The function computes the Jacobian of the model using finite differences. The formula is:

\[ \mathtt{J} = {\mathtt{model}(x + \mathtt{eps}) - \mathtt{model}(x) \over \mathtt{eps}} \]

Examples:

>>> data = np.array([[1, 2], [2, 3.0]])
>>> model = lambda x: np.sum(x, axis=1)
>>> compute_jacobian_numerically(model, data)
array([[1., 1.],
       [1., 1.]])

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	The dataset, (N, D)	required
`model`	`typing.Callable`	The black-box model ((N, D) -> (N))	required
`eps`	`float`	The finite difference step	`1e-08`

Returns:

Name	Type	Description
`jacobian`	`np.ndarray`	The Jacobian of the model, (N, D)

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_jacobian_numerically(
    model: typing.Callable, data: np.ndarray, eps: float = 1e-8
) -> np.ndarray:
    """Compute the Jacobian of the model using finite differences.

    Notes:
        The function computes the Jacobian of the model using finite differences. The formula is:

        $$
        \mathtt{J} = {\mathtt{model}(x + \mathtt{eps}) - \mathtt{model}(x) \over \mathtt{eps}}
        $$

    Examples:
        >>> data = np.array([[1, 2], [2, 3.0]])
        >>> model = lambda x: np.sum(x, axis=1)
        >>> compute_jacobian_numerically(model, data)
        array([[1., 1.],
               [1., 1.]])

    Args:
        data: The dataset, (N, D)
        model: The black-box model ((N, D) -> (N))
        eps: The finite difference step

    Returns:
        jacobian: The Jacobian of the model, (N, D)

    """
    assert data.ndim == 2
    jacobian = np.zeros_like(data)
    for f in range(data.shape[1]):
        data_plus = copy.deepcopy(data)
        data_plus[:, f] += eps
        jacobian[:, f] = (model(data_plus) - model(data)) / eps
    return jacobian

`compute_local_effects(data, model, limits, feature)`

Compute the local effects, permuting the feature of interest using the bin limits.

Notes

The function (a) allocates the points in the bins based on the feature of interest (foi) and (b) computes the effect as the difference when evaluating the output setting the foi at the right and the left limit of the bin.

Given that the bins are defined as a list [l_0, l_1, ..., l_k], and x_s of the i-th point belongs to the k-th bin:

\[ {df \over dx_s}(x^i) = {f(x_0^i, ... ,x_s=l_k, ..., x_D^i) - f(x_0^i, ... ,x_s=l_{k-1}, ..., x_D^i) \over l_k - l_{k-1}} \]

Examples:

>>> data = np.array([[1, 2], [2, 3.0]])
>>> model = lambda x: np.sum(x, axis=1)
>>> limits = np.array([1.0, 2.0])
>>> data_effect = compute_local_effects(data, model, limits, feature=0)
>>> data_effect
array([1., 1.])

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	The training set, (N, D)	required
`model`	`typing.Callable`	The black-box model ((N, D) -> (N))	required
`limits`	`np.ndarray`	The bin limits, (K+1)	required
`feature`	`int`	Index of the feature-of-interest	required

Returns:

Name	Type	Description
`data_effect`	`np.ndarray`	The local effect of each data point, (N)

Source code in /home/runner/work/effector/effector/effector/utils.py

def compute_local_effects(
    data: np.ndarray, model: typing.Callable, limits: np.ndarray, feature: int
) -> np.ndarray:
    """Compute the local effects, permuting the feature of interest using the bin limits.

    Notes:
        The function (a) allocates the points in the bins based on the feature of interest (foi)
        and (b) computes the effect as the difference when evaluating the output setting the foi at the right and the
        left limit of the bin.

        Given that the bins are defined as a list [l_0, l_1, ..., l_k], and x_s of the i-th point belongs to the k-th bin:

        $$
        {df \over dx_s}(x^i) = {f(x_0^i, ... ,x_s=l_k, ..., x_D^i) - f(x_0^i, ... ,x_s=l_{k-1}, ..., x_D^i)
         \over l_k - l_{k-1}}
        $$


    Examples:
        >>> data = np.array([[1, 2], [2, 3.0]])
        >>> model = lambda x: np.sum(x, axis=1)
        >>> limits = np.array([1.0, 2.0])
        >>> data_effect = compute_local_effects(data, model, limits, feature=0)
        >>> data_effect
        array([1., 1.])

    Args:
        data: The training set, (N, D)
        model: The black-box model ((N, D) -> (N))
        limits: The bin limits, (K+1)
        feature: Index of the feature-of-interest

    Returns:
        data_effect: The local effect of each data point, (N)

    """
    assert data.ndim == 2

    # check that limits cover all data points
    assert limits[0] <= np.min(data[:, feature])
    assert limits[-1] >= np.max(data[:, feature])

    # for each point, find the bin-index it belongs to
    limits[-1] += EPS
    ind = np.digitize(data[:, feature], limits)
    assert np.alltrue(ind > 0)

    # compute effect
    right_lim = copy.deepcopy(data)
    left_lim = copy.deepcopy(data)
    right_lim[:, feature] = limits[ind]
    left_lim[:, feature] = limits[ind - 1]
    dx = limits[1] - limits[0]
    data_effect = model(right_lim) - model(left_lim)
    return np.squeeze(data_effect) / dx

`fill_nans(x)`

Replace NaNs with interpolated values.

Examples:

>>> x = np.array([1.0, np.NaN, 2.0])
>>> fill_nans(x)
array([1. , 1.5, 2. ])

>>> x = np.array([1.0, np.NaN, np.NaN, np.NaN, 2.0])
>>> fill_nans(x)
array([1.  , 1.25, 1.5 , 1.75, 2.  ])

>>> x = np.array([0.5, 1.0, np.NaN, np.NaN, np.NaN])
>>> fill_nans(x)
array([0.5, 1. , 1. , 1. , 1. ])

Parameters:

Name	Type	Description	Default
`x`	`np.ndarray`	Time-series with NaNs, (T)	required

Returns:

Name	Type	Description
`x`	`np.ndarray`	Time-series values without NaNs, (T)

Source code in /home/runner/work/effector/effector/effector/utils.py

def fill_nans(x: np.ndarray) -> np.ndarray:
    """Replace NaNs with interpolated values.

    Examples:
        >>> x = np.array([1.0, np.NaN, 2.0])
        >>> fill_nans(x)
        array([1. , 1.5, 2. ])

        >>> x = np.array([1.0, np.NaN, np.NaN, np.NaN, 2.0])
        >>> fill_nans(x)
        array([1.  , 1.25, 1.5 , 1.75, 2.  ])

        >>> x = np.array([0.5, 1.0, np.NaN, np.NaN, np.NaN])
        >>> fill_nans(x)
        array([0.5, 1. , 1. , 1. , 1. ])

    Parameters:
        x: Time-series with NaNs, (T)

    Returns:
        x: Time-series values without NaNs, (T)
    """
    bin_effect_1 = copy.deepcopy(x)

    def nan_helper(y):
        return np.isnan(y), lambda z: z.nonzero()[0]

    nans, x = nan_helper(bin_effect_1)
    bin_effect_1[nans] = np.interp(x(nans), x(~nans), bin_effect_1[~nans])
    return bin_effect_1

`filter_points_in_bin(xs, df_dxs, limits)`

Filter the points inside the bin defined by the limits.

Notes

Filtering depends on whether xs lies in the interval [limits[0], limits[1]], not df_dxs.

Examples:

>>> xs = np.array([1, 2, 3])
>>> df_dxs = np.array([32, 34, 36])
>>> limits = np.array([1, 2])
>>> xs, df_dxs = filter_points_in_bin(xs, df_dxs, limits)
>>> xs
array([1, 2])
>>> df_dxs
array([32, 34])

Parameters:

Name	Type	Description	Default
`xs`	`np.ndarray`	The instances, (N)	required
`df_dxs`	`typing.Union[np.ndarray, None]`	The instance-effects (N) or None	required
`limits`	`np.ndarray`	[Start, Stop] of the bin	required

Returns:

Name	Type	Description
`data`	`np.ndarray`	The instances in the bin, (nof_points_in_bin, D)
`data_effect`	`typing.Union[np.ndarray, None]`	The instance-effects in the bin, (nof_points_in_bin, D) or None

Source code in /home/runner/work/effector/effector/effector/utils.py

def filter_points_in_bin(
    xs: np.ndarray, df_dxs: typing.Union[np.ndarray, None], limits: np.ndarray
) -> typing.Tuple[np.ndarray, typing.Union[np.ndarray, None]]:
    """
    Filter the points inside the bin defined by the `limits`.

    Notes:
        Filtering depends on whether `xs` lies in the interval [limits[0], limits[1]], not `df_dxs`.

    Examples:
        >>> xs = np.array([1, 2, 3])
        >>> df_dxs = np.array([32, 34, 36])
        >>> limits = np.array([1, 2])
        >>> xs, df_dxs = filter_points_in_bin(xs, df_dxs, limits)
        >>> xs
        array([1, 2])
        >>> df_dxs
        array([32, 34])

    Args:
        xs: The instances, (N)
        df_dxs: The instance-effects (N) or None
        limits: [Start, Stop] of the bin

    Returns:
        data: The instances in the bin, (nof_points_in_bin, D)
        data_effect: The instance-effects in the bin, (nof_points_in_bin, D) or None

    """
    filt = np.logical_and(limits[0] <= xs, xs <= limits[1])

    # return data
    xs = xs[filt]

    # return data effect if not None
    if df_dxs is not None:
        df_dxs = df_dxs[filt]
    return xs, df_dxs

`get_feature_types(data, categorical_limit=10)`

Determine the type of each feature.

Notes

A feature is considered as categorical if it has less than cat_limit unique values.

Parameters:

Name	Type	Description	Default
`data`	`np.ndarray`	The dataset, (N, D)	required
`categorical_limit`	`int`	Maximum unique values for a feature to be considered as categorical	`10`

Returns:

Name	Type	Description
`types`	`typing.List[str]`	A list of strings, where each string is either `"cat"` or `"cont"`

Source code in /home/runner/work/effector/effector/effector/utils.py

def get_feature_types(data: np.ndarray, categorical_limit: int = 10) -> typing.List[str]:
    """Determine the type of each feature.

    Notes:
        A feature is considered as categorical if it has less than `cat_limit` unique values.

    Args:
        data: The dataset, (N, D)
        categorical_limit: Maximum unique values for a feature to be considered as categorical


    Returns:
        types: A list of strings, where each string is either `"cat"` or `"cont"`

    """

    types = [
        "cat" if len(np.unique(data[:, f])) < categorical_limit else "cont"
        for f in range(data.shape[1])
    ]
    return types

API reference

Feature Effect Methods

effector.global_effect_ale.ALEBase

eval(feature, xs, heterogeneity=False, centering=False)

plot(feature, heterogeneity=False, centering=False, scale_x=None, scale_y=None, show_avg_output=False, y_limits=None, dy_limits=None)

effector.global_effect_ale.ALE

__init__(data, model, nof_instances='all', axis_limits=None, avg_output=None, feature_names=None, target_name=None)

fit(features='all', binning_method='fixed', centering='zero_integral')

effector.global_effect_ale.RHALE

__init__(data, model, model_jac=None, nof_instances='all', axis_limits=None, data_effect=None, avg_output=None, feature_names=None, target_name=None)

fit(features='all', binning_method='greedy', centering=False)

effector.global_effect_pdp.PDPBase

fit(features='all', centering=True, points_for_centering=100)

eval(feature, xs, heterogeneity=False, centering=False, return_all=False)

plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_ice='all', show_avg_output=False, y_limits=None)

effector.global_effect_pdp.PDP

__init__(data, model, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)

effector.global_effect_pdp.DerPDP

__init__(data, model, model_jac=None, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)

effector.global_effect_shap.ShapDP

__init__(data, model, axis_limits=None, nof_instances=100, avg_output=None, feature_names=None, target_name=None)

fit(features='all', centering=False, points_for_centering=100)

eval(feature, xs, heterogeneity=False, centering=False)

plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_shap_values='all', show_avg_output=False, y_limits=None)

Regional Effect Methods

effector.regional_effect.RegionalEffectBase

eval(feature, node_idx, xs, heterogeneity=False, centering=False)

plot(feature, node_idx, heterogeneity=False, centering=False, scale_x_list=None, scale_y=None, y_limits=None)

get_node_info(feature, node_idx)

effector.regional_effect_ale.RegionalALE

__init__(data, model, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)

effector.regional_effect_ale.RegionalRHALE

__init__(data, model, model_jac=None, instance_effects=None, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)

fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, binning_method='greedy', centering=False)

effector.regional_effect_pdp.RegionalPDPBase

fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, nof_instances='all', points_for_centering=100)

effector.regional_effect_pdp.RegionalPDP

__init__(data, model, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)

effector.regional_effect_pdp.RegionalDerPDP

__init__(data, model, model_jac=None, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)

effector.regional_effect_shap.RegionalShapDP

__init__(data, model, axis_limits=None, nof_instances=100, feature_types=None, cat_limit=10, feature_names=None, target_name=None)

fit(features, heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, points_for_centering=100)

Binning Methods

effector.binning_methods.Fixed

__init__(nof_bins=100, min_points_per_bin=10, cat_limit=15)

effector.binning_methods.Greedy

__init__(init_nof_bins=100, min_points_per_bin=10, discount=0.3, cat_limit=15)

effector.binning_methods.DynamicProgramming

__init__(max_nof_bins=20, min_points_per_bin=10, discount=0.3, cat_limit=15)

Utils

compute_accumulated_effect(x, limits, bin_effect, dx, square=False)

compute_ale_params(xs, df_dxs, limits)

compute_bin_effect(xs, df_dxs, limits)

compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)

compute_jacobian_numerically(model, data, eps=1e-08)

compute_local_effects(data, model, limits, feature)

fill_nans(x)

filter_points_in_bin(xs, df_dxs, limits)

get_feature_types(data, categorical_limit=10)

`effector.global_effect_ale.ALEBase`

`eval(feature, xs, heterogeneity=False, centering=False)`

`plot(feature, heterogeneity=False, centering=False, scale_x=None, scale_y=None, show_avg_output=False, y_limits=None, dy_limits=None)`

`effector.global_effect_ale.ALE`

`init(data, model, nof_instances='all', axis_limits=None, avg_output=None, feature_names=None, target_name=None)`

`fit(features='all', binning_method='fixed', centering='zero_integral')`

`effector.global_effect_ale.RHALE`

`init(data, model, model_jac=None, nof_instances='all', axis_limits=None, data_effect=None, avg_output=None, feature_names=None, target_name=None)`

`fit(features='all', binning_method='greedy', centering=False)`

`effector.global_effect_pdp.PDPBase`

`fit(features='all', centering=True, points_for_centering=100)`

`eval(feature, xs, heterogeneity=False, centering=False, return_all=False)`

`plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_ice='all', show_avg_output=False, y_limits=None)`

`effector.global_effect_pdp.PDP`

`init(data, model, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)`

`effector.global_effect_pdp.DerPDP`

`init(data, model, model_jac=None, axis_limits=None, nof_instances=300, avg_output=None, feature_names=None, target_name=None)`

`effector.global_effect_shap.ShapDP`

`init(data, model, axis_limits=None, nof_instances=100, avg_output=None, feature_names=None, target_name=None)`

`fit(features='all', centering=False, points_for_centering=100)`

`eval(feature, xs, heterogeneity=False, centering=False)`

`plot(feature, heterogeneity=False, centering=False, nof_points=30, scale_x=None, scale_y=None, nof_shap_values='all', show_avg_output=False, y_limits=None)`

`effector.regional_effect.RegionalEffectBase`

`eval(feature, node_idx, xs, heterogeneity=False, centering=False)`

`plot(feature, node_idx, heterogeneity=False, centering=False, scale_x_list=None, scale_y=None, y_limits=None)`

`get_node_info(feature, node_idx)`

`effector.regional_effect_ale.RegionalALE`

`init(data, model, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

`effector.regional_effect_ale.RegionalRHALE`

`init(data, model, model_jac=None, instance_effects=None, nof_instances='all', axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

`fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, binning_method='greedy', centering=False)`

`effector.regional_effect_pdp.RegionalPDPBase`

`fit(features='all', heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, nof_instances='all', points_for_centering=100)`

`effector.regional_effect_pdp.RegionalPDP`

`init(data, model, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

`effector.regional_effect_pdp.RegionalDerPDP`

`init(data, model, model_jac=None, nof_instances=1000, axis_limits=None, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

`effector.regional_effect_shap.RegionalShapDP`

`init(data, model, axis_limits=None, nof_instances=100, feature_types=None, cat_limit=10, feature_names=None, target_name=None)`

`fit(features, heter_pcg_drop_thres=0.1, heter_small_enough=0.1, max_depth=1, nof_candidate_splits_for_numerical=20, min_points_per_subregion=10, candidate_conditioning_features='all', split_categorical_features=False, centering=False, points_for_centering=100)`

`effector.binning_methods.Fixed`

`init(nof_bins=100, min_points_per_bin=10, cat_limit=15)`

`effector.binning_methods.Greedy`

`init(init_nof_bins=100, min_points_per_bin=10, discount=0.3, cat_limit=15)`

`effector.binning_methods.DynamicProgramming`

`init(max_nof_bins=20, min_points_per_bin=10, discount=0.3, cat_limit=15)`

`compute_accumulated_effect(x, limits, bin_effect, dx, square=False)`

`compute_ale_params(xs, df_dxs, limits)`

`compute_bin_effect(xs, df_dxs, limits)`

`compute_bin_variance(xs, df_dxs, limits, bin_effect_mean)`

`compute_jacobian_numerically(model, data, eps=1e-08)`

`compute_local_effects(data, model, limits, feature)`

`fill_nans(x)`

`filter_points_in_bin(xs, df_dxs, limits)`

`get_feature_types(data, categorical_limit=10)`