1 min read 134 words Updated Apr 25, 2026 Created May 03, 2026
.datasets
	.load_iris()

.model_selection
	.train_test_split(X, y, test_size, random_state, stratify)

.proprocessing
	.StandardScaler
		.{fit, transform}
	.LabelEncoder [注意下转化为整数后默认有序]
		.{fit, transform, fit_transform, inverse_transform}
	.OneHotEncoder [处理无序编码代价是数据维度升高]
		.{fit, transform, fit_transform, inverse_transform}
	.MinMaxScaler
		.{fit, transform, fit_transform}

.compose
	.ColumnTransformer [用于合并 OneHotEncoder 的结果与原信息]

.linear_model
	.Perception(eta0 [learning rate], max_iter, random_state, ...)
		.{fit, predict}
	.LogisticRegression(C [正则化参数C 正则化], solver [优化算法newtow-cg, lbfgs, liblinear, sag, sage...], multi_class, penalty [正则化模型 l1, l2])
		.{fit, predict}
		.predict_proba
	.SGDClassifier(loss [损失函数 perceptron, log, hinge])

.svm
	.SVC(kernel [核函数: linear, rbf径向基函数], gamma [高斯球的截至参数], C [正则化])
		.fit

.metrics
	.accuracy_score(y_test, y_pred)

.tree
	.DecisionTreeClassifier
	.export_graphviz

.ensemble
	.RandomForestClassifier
		.fit
		.feature_importances_ [用于评估特征的重要性]

.neighbors
	.KNeighborsClassifier [KNN]
		.fit [实现可以为线性枚举KD 球树!【球树在随机数据下时间是 log]

.impute
	.SimpleImputer(missing_value, stratagy [mean, ...]) [处理缺失的数据]
		.{fit, transform}

.feature_selection
	.SelectFromModel(model [Forest ?], threshold, prefit)
		.transform

.decomposition
	.PCA(n_componets [= None 则只排序0 < x < 1 则为阈值>= 1 则为数量])
		.{fit, transform, fit_transform}
		.explained_variance_ratio_
np.clip(a, a_min, a_max) = min(max(a, a_min), a_max)