.datasets
.load_iris()
.model_selection
.train_test_split(X, y, test_size, random_state, stratify)
.proprocessing
.StandardScaler
.{fit, transform}
.LabelEncoder [注意下,转化为整数后默认有序!]
.{fit, transform, fit_transform, inverse_transform}
.OneHotEncoder [处理无序编码,代价是数据维度升高]
.{fit, transform, fit_transform, inverse_transform}
.MinMaxScaler
.{fit, transform, fit_transform}
.compose
.ColumnTransformer [用于合并 OneHotEncoder 的结果与原信息]
.linear_model
.Perception(eta0 [learning rate], max_iter, random_state, ...)
.{fit, predict}
.LogisticRegression(C [正则化参数,C↑ 正则化↓], solver [优化算法:newtow-cg, lbfgs, liblinear, sag, sage...], multi_class, penalty [正则化模型 l1, l2])
.{fit, predict}
.predict_proba
.SGDClassifier(loss [损失函数 perceptron, log, hinge])
.svm
.SVC(kernel [核函数: linear, rbf(径向基函数)], gamma [高斯球的截至参数], C [正则化])
.fit
.metrics
.accuracy_score(y_test, y_pred)
.tree
.DecisionTreeClassifier
.export_graphviz
.ensemble
.RandomForestClassifier
.fit
.feature_importances_ [用于评估特征的重要性]
.neighbors
.KNeighborsClassifier [KNN]
.fit [实现可以为:线性枚举,KD 树,球树!【球树在随机数据下时间是 log】]
.impute
.SimpleImputer(missing_value, stratagy [mean, ...]) [处理缺失的数据]
.{fit, transform}
.feature_selection
.SelectFromModel(model [Forest ?], threshold, prefit)
.transform
.decomposition
.PCA(n_componets [= None 则只排序,0 < x < 1 则为阈值,>= 1 则为数量])
.{fit, transform, fit_transform}
.explained_variance_ratio_
np.clip(a, a_min, a_max) = min(max(a, a_min), a_max)