特征选择--PCA与互信息的使用与比较
本帖最后由 Handsome_zhou 于 2021-12-16 18:31 编辑from sklearn.datasets import load_boston
import pandas as pd
import numpy as np
d = load_boston()
x = d.data
y = d.target
print(x[:10])
print('形状:', x.shape)
from sklearn.decomposition import PCA
pca = PCA(n_components=10)
x = pca.fit_transform(x)
%time
x = pd.DataFrame(d.data)
y = pd.DataFrame(d.target)
x = np.matrix(x)
y = np.matrix(y)
from sklearn.preprocessing import StandardScaler #标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
from sklearn.svm import SVR
svr = SVR(gamma = 'scale')
svr.fit(X_train,y_train)
train_score = svr.score(X_train,y_train)
test_score = svr.score(X_test,y_test)
print('train score: {} ; test score: {}'.format(train_score,test_score))
结果:
from sklearn import datasets
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import mutual_info_regression
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
d = load_boston()
x = pd.DataFrame(d.data)
y = pd.DataFrame(d.target)
x = np.matrix(x)
y = np.matrix(y)
mi = mutual_info_regression(x,y)
X = SelectKBest(mutual_info_regression, k=10).fit_transform(x,y)
from sklearn.preprocessing import StandardScaler #标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = scaler.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2)
from sklearn.svm import SVR
svr = SVR(gamma = 'scale')
svr.fit(X_train,y_train)
train_score = svr.score(X_train,y_train)
test_score = svr.score(X_test,y_test)
print('train score: {} ; test score: {}'.format(train_score,test_score))
结果:
在用boston数据集得到的结果来看,PCA和互信息两种特征选择方法得到的数据在支持向量机下并没有明显的性能差别。 学习
页:
[1]