mightypy.ml package#

Module contents#

mightypy.ml#

class ALS(dim_factors, n_iter, lambda_=1.0)[source]#

Bases: object

Alternating Least Squares

data_preparation(dataframe: DataFrame, user_col: str, item_col: str, score_col: str)[source]#
fit(dataframe: DataFrame, user_col: str = 'user_id', item_col: str = 'item_id', score_col: str = 'ratings')[source]#
class DecisionTreeClassifier(max_depth: int = 100, min_samples_split: int = 2, criteria: str = 'gini')[source]#

Bases: object

Decision Tree Based Classification Model

Parameters:
  • max_depth (int, optional) – max depth of the tree. Defaults to 100.

  • min_samples_split (int, optional) – min size of the sample at the time of split. Defaults to 2.

  • criteria (str, optional) – what criteria to use for information. Defaults to ‘gini’. available ‘gini’,’entropy’.

predict(X: ndarray | list) ndarray[source]#

predict classification results

Parameters:

X (Union[np.ndarray,list]) – testing matrix.

Raises:

ValueError – input X can only be a list or numpy array.

Returns:

results of classification.

Return type:

np.ndarray

predict_probability(X: ndarray | list) ndarray | dict[source]#

predict classfication probabilities

Parameters:

X (Union[np.ndarray,list]) – testing matrix.

Raises:

ValueError – input X can only be a list or numpy array.

Returns:

probabity results of classification.

Return type:

Union[np.ndarray, dict]

print_tree(node: Node | None = None, spacing: str = '|-') None[source]#

print the tree

Parameters:
  • node (Union[Node,None], optional) – starting node. Defaults to None. then it will go to the root node of the tree.

  • spacing (str, optional) – printing separater. Defaults to “|-“.

train(X: ndarray | list, y: ndarray | list, feature_name: list = None, target_name: list = None) None[source]#

Train the model

Parameters:
  • X (Union[np.ndarray,list]) – feature matrix.

  • y (Union[np.ndarray,list]) – target matrix.

  • feature_name (list, optional) – feature names list. Defaults to None.

  • target_name (list, optional) – target name list. Defaults to None.

class DecisionTreeRegressor(max_depth: int = 10, min_samples_split: int = 3, criteria: str = 'variance')[source]#

Bases: object

Decision Tree Based Regression Model

Parameters:
  • max_depth (int, optional) – maximum depth of the tree. Defaults to 10.

  • min_samples_split (int, optional) – minimum number of samples while splitting. Defaults to 3.

  • criteria (str, optional) – criteria for best info gain. Defaults to ‘variance’.

predict(X: ndarray, mean_preds: bool = True) ndarray[source]#

predict regresssion :param X: testing matrix. :type X: np.ndarray :param mean_preds: do the mean of prediction values. Defaults to True. :type mean_preds: bool

Raises:

ValueError – X should be list or numpy array

Returns:

regression prediction.

Return type:

np.ndarray

print_tree(node: Node | None = None, spacing: str = '|-', mean_preds: bool = True) None[source]#

print the tree

Parameters:
  • node (Union[Node,None], optional) – starting node. Defaults to None. then it will go to the root node of the tree.

  • spacing (str, optional) – printing separater. Defaults to “|-“.

  • mean_preds (bool) – do the mean of prediction values. Defaults to True.

train(X: ndarray | list, y: ndarray | list, feature_name: list = None, target_name: list = None) None[source]#

Train the model

Parameters:
  • X (Union[np.ndarray,list]) – feature matrix.

  • y (Union[np.ndarray,list]) – target matrix.

  • feature_name (list, optional) – feature names list. Defaults to None.

  • target_name (list, optional) – target name list. Defaults to None.

class LassoRegression(alpha: float = 0.01, iterations: int = 10000)[source]#

Bases: object

Lasso Regression Model Class (L1 Regularization)

Parameters:
  • alpha (float, optional) – learning rate. Defaults to 0.01.

  • iterations (int, optional) – number of iteratons. Defaults to 10000.

References

https://machinelearningexploration.readthedocs.io/en/latest/LinearRegression/Explore.html#Lasso(L1-Regularization)-Regression

property X: ndarray | None#

property X

Returns:

X matrix

Return type:

Union[np.ndarray, None]

property cost_history: list | None#

cost learning history

Returns:

cost history list

Return type:

Union[list, None]

predict(X: ndarray) ndarray[source]#

generate prediction

Parameters:

X (np.ndarray) – input feature matrix

Raises:
  • ValueError – if shape is not proper for the input feature

  • Warning – if model is not trained yet

Returns:

predicted values

Return type:

np.ndarray

property theta: ndarray | None#

property theta

Returns:

theta matrix

Return type:

Union[np.ndarray, None]

property theta_history: list | None#

theta training history

Returns:

theta history list

Return type:

Union[list, None]

train(X: ndarray, y: ndarray, verbose: bool = True, method: str = 'SGD', theta_precision: float = 0.001, penalty: int | float = 1.0, batch_size: int = 30) None[source]#

train model /theta estimator

Parameters:
  • X (np.ndarray) – X matrix/feature matrix.

  • y (np.ndarray) – y matrix/target matrix.

  • verbose (bool, optional) – print things. Defaults to True.

  • method (str, optional) –

    training method. Defaults to “SGD”.

    Available-

    “BGD”(Batch Graident Descent), “SGD”(Stochastic Gradient Descent)

  • theta_precision (float, optional) – theta initialization value precision. Defaults to 0.001.

  • penalty (Union[float, int], optional) – regularization penalty. Defaults to 1.0.

  • batch_size (int, optional) – batch size only for BGD. Defaults to 30.

property y: ndarray | None#

property y

Returns:

y matrix

Return type:

Union[np.ndarray, None]

class LinearRegression(alpha: float = 0.01, iterations: int = 10000)[source]#

Bases: object

Linear Regression Model Class

Parameters:
  • alpha (float, optional) – learning rate. Defaults to 0.01.

  • iterations (int, optional) – number of iteratons. Defaults to 10000.

References

https://machinelearningexploration.readthedocs.io/en/latest/LinearRegression/Explore.html

property X: ndarray | None#

property X

Returns:

X matrix

Return type:

Union[np.ndarray, None]

property cost_history: list | None#

cost learning history

Returns:

cost history list

Return type:

Union[list, None]

predict(X: ndarray) ndarray[source]#

generate prediction

Parameters:

X (np.ndarray) – input feature matrix

Raises:
  • ValueError – if shape is not proper for the input feature

  • Warning – if model is not trained yet

Returns:

predicted values

Return type:

np.ndarray

property theta: ndarray | None#

property theta

Returns:

theta matrix

Return type:

Union[np.ndarray, None]

property theta_history: list | None#

theta training history

Returns:

theta history list

Return type:

Union[list, None]

train(X: ndarray, y: ndarray, verbose: bool = True, method: str = 'SGD', theta_precision: float = 0.001, batch_size: int = 30) None[source]#

train model /theta estimator

Parameters:
  • X (np.ndarray) – X matrix/feature matrix.

  • y (np.ndarray) – y matrix/target matrix.

  • verbose (bool, optional) – print things. Defaults to True.

  • method (str, optional) –

    training method. Defaults to “SGD”.

    Available-

    “BGD”(Batch Graident Descent), “SGD”(Stochastic Gradient Descent), “NORMAL”(Normal Equation)

  • theta_precision (float, optional) – theta initialization value precision. Defaults to 0.001.

  • batch_size (int, optional) – batch size only for BGD. Defaults to 30.

property y: ndarray | None#

property y

Returns:

y matrix

Return type:

Union[np.ndarray, None]

class LogisticRegression(alpha: float = 0.01, iterations: int = 10000)[source]#

Bases: object

Logisitic Regression Model Class

Parameters:
  • alpha (float, optional) – [description]. Defaults to 0.01.

  • iterations (int, optional) – [description]. Defaults to 10000.

References

https://machinelearningexploration.readthedocs.io/en/latest/LogisticRegression/Explore.html

property X: ndarray | None#

property X

Returns:

X matrix

Return type:

Union[np.ndarray, None]

property cost_history: list | None#

cost learning history

Returns:

cost history list

Return type:

Union[list, None]

predict(X: ndarray) ndarray[source]#

generate prediction

Parameters:

X (np.ndarray) – input feature matrix

Raises:
  • ValueError – if shape is not proper for the input feature

  • Warning – if model is not trained yet

Returns:

predicted values

Return type:

np.ndarray

property theta: ndarray | None#

property theta

Returns:

theta matrix

Return type:

Union[np.ndarray, None]

property theta_history: list | None#

theta training history

Returns:

theta history list

Return type:

Union[list, None]

train(X: ndarray, y: ndarray, verbose: bool = True, method: str = 'SGD', theta_precision: float = 0.001, batch_size: int = 30, regularization: bool = False, penalty: float | int = 1.0) None[source]#

train theta / estimator

Parameters:
  • X (np.ndarray) – X matrix/feature matrix.

  • y (np.ndarray) – y matrix/target matrix.

  • verbose (bool, optional) – print things. Defaults to True.

  • method (str, optional) –

    training method. Defaults to “SGD”.

    Available-

    “BGD”(Batch Graident Descent), “SGD”(Stochastic Gradient Descent)

  • theta_precision (float, optional) – theta initialization value precision. Defaults to 0.001.

  • batch_size (int, optional) – batch size only for BGD. Defaults to 30.

  • regularization (bool, optional) – Apply Regularization. Defaults to False.

  • penalty (Union[float, int], optional) – regularization penalty only works for regularization=True. Defaults to 1.0.

property y: ndarray | None#

property y

Returns:

y matrix

Return type:

Union[np.ndarray, None]

class RandomForestClassifier(num_of_trees: int = 25, min_features: int | None = None, max_depth: int = 50, min_samples_split: int = 2, criteria: str = 'gini')[source]#

Bases: object

Ensemble method for classification

using a bunch of Decision Tree’s to do to the classification.

Parameters:
  • num_of_trees (int, optional) – number of trees in ensemble. Defaults to 50.

  • min_features (int, optional) – minimum number of features to use in every tree. Defaults to None.

  • max_depth (int, optional) – max depth of the every tree. Defaults to 100.

  • min_samples_split (int, optional) – minimum size ofsampels to split. Defaults to 2.

  • criteria (str, optional) – criteria to calcualte information gain. Defaults to ‘gini’.

predict(X: ndarray) ndarray[source]#

predict results

Parameters:

X (np.ndarray) – test matrix.

Raises:

ValueError – X should be list or numpy array.

Returns:

prediction results.

Return type:

np.ndarray

train(X: ndarray | list, y: ndarray | list, feature_name: list | None = None, target_name: list | None = None) None[source]#

Train the model

Parameters:
  • X (Union[np.ndarray,list]) – feature matrix

  • y (Union[np.ndarray,list]) – target matrix

  • feature_name (str, optional) – feature names. Defaults to None.

  • target_name (str, optional) – target names. Defaults to None.

class RandomForestRegressor(num_of_trees: int = 25, min_features: int | None = None, max_depth: int = 30, min_samples_split: int = 3, criteria: str = 'variance')[source]#

Bases: object

Ensemble method for regression

using a bunch of Decision Tree’s to do to the regression.

Parameters:
  • num_of_trees (int, optional) – number of trees in ensemble. Defaults to 50.

  • min_features (int, optional) – minimum number of features to use in every tree. Defaults to None.

  • max_depth (int, optional) – max depth of the every tree. Defaults to 100.

  • min_samples_split (int, optional) – minimum size ofsampels to split. Defaults to 2.

  • criteria (str, optional) – criteria to calcualte information gain. Defaults to ‘gini’.

predict(X: ndarray | list) ndarray[source]#

predict regression result

Parameters:

X (Union[np.ndarray, list]) – test matrix.

Raises:

ValueError – X should be list or numpy array.

Returns:

regression results.

Return type:

np.ndarray

train(X: ndarray | list, y: ndarray | list, feature_name: list | None = None, target_name: list | None = None) None[source]#

Train the model

Parameters:
  • X (Union[np.ndarray,list]) – feature matrix.

  • y (Union[np.ndarray,list]) – target matrix.

  • feature_name (list, optional) – feature names. Defaults to None.

  • target_name (list, optional) – target name. Defaults to None.

class RidgeRegression(alpha: float = 0.01, iterations: int = 10000)[source]#

Bases: object

Ridge Regression Model Class (L2 Regularization)

Parameters:
  • alpha (float, optional) – learning rate. Defaults to 0.01.

  • iterations (int, optional) – number of iteratons. Defaults to 10000.

References

https://machinelearningexploration.readthedocs.io/en/latest/LinearRegression/Explore.html#Ridge(L2-Regularization)-Regression

property X: ndarray | None#

property X

Returns:

X matrix

Return type:

Union[np.ndarray, None]

property cost_history: list | None#

cost learning history

Returns:

cost history list

Return type:

Union[list, None]

predict(X: ndarray) ndarray[source]#

generate prediction

Parameters:

X (np.ndarray) – input feature matrix

Raises:
  • ValueError – if shape is not proper for the input feature

  • Warning – if model is not trained yet

Returns:

predicted values

Return type:

np.ndarray

property theta: ndarray | None#

property theta

Returns:

theta matrix

Return type:

Union[np.ndarray, None]

property theta_history: list | None#

theta training history

Returns:

theta history list

Return type:

Union[list, None]

train(X: ndarray, y: ndarray, verbose: bool = True, method: str = 'SGD', theta_precision: float = 0.001, penalty: float | int = 1.0, batch_size: int = 30) None[source]#

train model /theta estimator

Parameters:
  • X (np.ndarray) – X matrix/feature matrix.

  • y (np.ndarray) – y matrix/target matrix.

  • verbose (bool, optional) – print things. Defaults to True.

  • method (str, optional) –

    training method. Defaults to “SGD”.

    Available-

    “BGD”(Batch Graident Descent), “SGD”(Stochastic Gradient Descent), “NORMAL”(Normal Equation)

  • theta_precision (float, optional) – theta initialization value precision. Defaults to 0.001.

  • penalty (Union[float, int], optional) – regularization penalty. Defaults to 1.0.

  • batch_size (int, optional) – batch size only for BGD. Defaults to 30.

property y: ndarray | None#

property y

Returns:

y matrix

Return type:

Union[np.ndarray, None]

moving_window_matrix(arr: ndarray, window: int, lag: int = 1) ndarray[source]#

Create Moving Window matrix for 1D data.

More details on this function. https://machinelearningexploration.readthedocs.io/en/latest/MathExploration/MovingWindow.html

Parameters:
  • arr (np.ndarray) – input 1D array.

  • window (int) – window/ number of columns.

  • lag (int, optional) – lag count for moving. Defaults to 1.

Returns:

transformed matrix.

Return type:

np.ndarray

Raises:
  • AssertionError – input array shape should be 1D like (m,).

  • AssertionError – length of array should be greater than window size and lag.

Example

>>> a = np.random.rand(100)
>>> print(moving_window_matrix(a, 20, 2))
polynomial_regression(x: ndarray, y: ndarray, degree: int) Tuple[ndarray, ndarray, ndarray][source]#

fit Regression line with polynomial degree.

Parameters:
  • x (np.ndarray) – independent variable.

  • y (np.ndarray) – dependent variable.

  • degree (int) – polynomial degree.

Returns:

slope, residual, fitline.

Return type:

Tuple[np.ndarray, np.ndarray, np.ndarray]

Examples

>>> import matplotlib.pyplot as plt
>>> x = np.arange(-10, 10)
>>> y = x**2 + x**3
>>> s, r, l = polynomial_regression(x, y, 3)
>>> plt.plot(x, y, 'ko', label='original')
>>> plt.plot(x, l, '.-',  label='regression line')
>>> plt.legend()
>>> plt.show()
sigmoid(val: ndarray) ndarray[source]#

Sigmoid function

\[f(z) = \frac{1}{1 + e^{-z}}\]
Parameters:

val (ndarray) – input value

Returns:

sigmoid value

Return type:

np.ndarray

trend(x: ndarray, y: ndarray) Tuple[ndarray, ndarray, ndarray][source]#

get trend of the data.

Parameters:
  • x (np.ndarray) – independent variable.

  • y (np.ndarray) – dependent variable.

Returns:

slope, residual, trendline.

Return type:

Tuple[np.ndarray, np.ndarray, np.ndarray]

Examples;
>>> import matplotlib.pyplot as plt
>>> x = np.array([1, 2, 3, 4, 5, 6, 7, 8])
>>> y = np.array([1, 2, 3, 3, 4, 5, 7, 10])
>>> s, r, t = trend(x, y)
>>> plt.plot(x, y, 'o', label='original')
>>> plt.plot(x, t, '.-',  label='regression line')
>>> plt.legend()
>>> plt.show()