|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
请教各位高手:
用C++编写的主成分分析(PCA)求解得到的特征向量与Matlab自带PCA函数特征向量正负号有些不同。
特征向量数值是相同的,特征值也一样,就特征向量的正负符号有些是相反的。
我在网上找过两个自编版本,都出现类似问题。
困扰好久了,请高人指点。
#include <iostream>
#include "Eigen/Dense"
#include "pca.h"
#include <fstream>
using Eigen::MatrixXd;
using namespace std;
int main()
{
int rows = 150; //number of samples
int columns = 4; //number of components
string pathData = "test_data.txt";
MatrixXd m(rows, columns);
ifstream istrm;
istrm.open(pathData);
double a{ 0 };
for (int i = 0; i < 150; i++)
{
istrm >> a;
m(i, 0) = a;
istrm >> a;
m(i, 1) = a;
istrm >> a;
m(i, 2) = a;
istrm >> a;
m(i, 3) = a;
}
istrm.close();
//for (int i = 0; i < 150; i++)
//{
// printf("m的值:%lf,%lf,%lf,%lf\n", m(i, 0), m(i, 1), m(i, 2), m(i, 3));
//}
MatrixXd uReduced = PCA::Compute(m);
MatrixXd mReduced = m*uReduced;
cout << uReduced << endl;
system("pause");
}
上面是主函数
#ifndef PCA_H
#define PCA_H
#include <iostream>
#include <assert.h>
#include <vector>
#include "Eigen/Dense"
#include "Eigen/Eigenvalues"
using namespace Eigen;
using namespace std;
class PCA
{
public:
/*
* Computes the princcipal component of a given matrix. Computation steps:
* Compute the mean image.
* Compute standard dispersion
* Normalizing input data
* Compute the covariance matrix
* Calculate the eigenvalues and eigenvectors for the covariance matrix
* @input MatrixXd D the data samples matrix.
*
* @returns MatrixXd The eigenVectors of principal component
*/
static MatrixXd Compute(MatrixXd D)
{
// 1. Compute the mean image
MatrixXd mean(1, D.cols());
mean.setZero();
for (int i = 0; i < D.cols(); i++)
{
mean(0, i) = D.col(i).mean();
}
// 2. Compute standard dispersion
MatrixXd s(1, D.cols());
for (int i = 0; i < D.cols(); i++)
{
double ss{ 0 };
for (int j = 0; j < D.rows(); j++)
{
double tmp = (D(j, i) - mean(0, i));
tmp *= tmp;
ss += tmp;
}
ss /= (D.rows() - 1);
s(0, i) = sqrt(ss);
}
//cout << s << endl;
// 3. Normalizing input data
MatrixXd normD(D);
normD.setZero();
for (int i = 0; i < D.cols(); i++)
{
for (int j = 0; j < D.rows(); j++)
{
normD(j, i) = (D(j, i) - mean(0, i)) / s(0, i);
}
}
//cout << normD << endl;
// 3. Compute the covariance matrix
mean.setZero();
for (int i = 0; i < normD.cols(); i++)
{
mean(0, i) = normD.col(i).mean();
}
MatrixXd covariance(D.cols(), D.cols());
covariance.setZero();
for (int k = 0; k < D.cols(); k++)
{
for (int j = 0; j < D.cols(); j++)
{
double sum{ 0 };
for (int i = 0; i < D.rows(); i++)
{
sum += (normD(i, k) - mean(0, k))*(normD(i, j) - mean(0, j));
}
covariance(k, j) = sum / (D.rows() - 1);
}
}
covariance(0, 0) = 0.685693512304251;
covariance(0, 1) = -0.0424340044742729;
covariance(0, 2) = 1.27431543624161;
covariance(0, 3) = 0.516270693512304;
covariance(1, 0) = -0.0424340044742729;
covariance(1, 1) = 0.189979418344519;
covariance(1, 2) = -0.329656375838926;
covariance(1, 3) = -0.121639373601790;
covariance(2, 0) = 1.27431543624161;
covariance(2, 1) = -0.329656375838926;
covariance(2, 2) = 3.11627785234899;
covariance(2, 3) = 1.29560939597315;
covariance(3, 0) = 0.516270693512304;
covariance(3, 1) = -0.121639373601790;
covariance(3, 2) = 1.29560939597315;
covariance(3, 3) = 0.581006263982103;
//cout << "斜方差" << "\n"<<endl;
//cout << covariance << endl;
// 4. Calculate the eigenvalues and eigenvectors for the covariance matrix
EigenSolver<MatrixXd> solver(covariance);
MatrixXd eigenVectors = solver.eigenvectors().real();
VectorXd eigenValues = solver.eigenvalues().real();
cout << eigenVectors << endl;
cout << "\n";
cout << eigenValues << endl;
cout << "\n";
// 5. Find out an number of most important principal components with broken stick method
int newDimension = brockenStickMethod(eigenValues);
// 6. Return the matrix of eigenVectors appropriate most important components
MatrixXd featureVectors(D.cols(), newDimension);
VectorXd sortEV(eigenValues);
sort(sortEV.derived().data(), sortEV.derived().data() + sortEV.derived().size());
for (int i = 0; i < newDimension; i++)
{
double max = sortEV(sortEV.rows() - 1 - i);
for (int j = 0; j < eigenValues.size(); j++)
{
if (max == eigenValues(j))
{
VectorXd tmp = eigenVectors.col(j);
featureVectors.col(i) = tmp;
}
}
}
return featureVectors;
}
private:
static int brockenStickMethod(VectorXd eigenValues)
{
// Compute number of components, which most influed
double trc{ eigenValues.sum() };
VectorXd l(eigenValues.size());
int countPrincComp{ 0 };
l.setZero();
for (int i = 0; i < eigenValues.size(); i++)
{
double sum{ 0 };
for (int j = i; j < eigenValues.size(); j++)
{
double coef = 1;
sum += (coef / j);
}
l(i) = sum / eigenValues.size();
}
VectorXd sortEV(eigenValues);
sort(sortEV.derived().data(), sortEV.derived().data() + sortEV.derived().size());
for (int i = 0; i < sortEV.size(); i++)
{
if ((sortEV(i) / trc) >l(i)) { countPrincComp++; }
}
return countPrincComp;
}
};
#endif // PCA_H
这个是pac.h文件。
在测试阶段,我把主成分步骤中corr的值替换成cov,使得它和Matlab版本一致。
|
|