|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
请教各位高手:
用C++编写的主成分分析(PCA)求解得到的特征向量与Matlab自带PCA函数特征向量正负号有些不同。
特征向量数值是相同的,特征值也一样,就特征向量的正负符号有些是相反的。
我在网上找过两个自编版本,都出现类似问题。
困扰好久了,请高人指点。
- #include <iostream>
- #include "Eigen/Dense"
- #include "pca.h"
- #include <fstream>
- using Eigen::MatrixXd;
- using namespace std;
- int main()
- {
- int rows = 150; //number of samples
- int columns = 4; //number of components
- string pathData = "test_data.txt";
- MatrixXd m(rows, columns);
- ifstream istrm;
- istrm.open(pathData);
- double a{ 0 };
- for (int i = 0; i < 150; i++)
- {
- istrm >> a;
- m(i, 0) = a;
- istrm >> a;
- m(i, 1) = a;
- istrm >> a;
- m(i, 2) = a;
- istrm >> a;
- m(i, 3) = a;
- }
- istrm.close();
- //for (int i = 0; i < 150; i++)
- //{
- // printf("m的值:%lf,%lf,%lf,%lf\n", m(i, 0), m(i, 1), m(i, 2), m(i, 3));
- //}
- MatrixXd uReduced = PCA::Compute(m);
- MatrixXd mReduced = m*uReduced;
- cout << uReduced << endl;
- system("pause");
- }
复制代码
上面是主函数
- #ifndef PCA_H
- #define PCA_H
- #include <iostream>
- #include <assert.h>
- #include <vector>
- #include "Eigen/Dense"
- #include "Eigen/Eigenvalues"
- using namespace Eigen;
- using namespace std;
- class PCA
- {
- public:
- /*
- * Computes the princcipal component of a given matrix. Computation steps:
- * Compute the mean image.
- * Compute standard dispersion
- * Normalizing input data
- * Compute the covariance matrix
- * Calculate the eigenvalues and eigenvectors for the covariance matrix
- * @input MatrixXd D the data samples matrix.
- *
- * @returns MatrixXd The eigenVectors of principal component
- */
- static MatrixXd Compute(MatrixXd D)
- {
- // 1. Compute the mean image
- MatrixXd mean(1, D.cols());
- mean.setZero();
- for (int i = 0; i < D.cols(); i++)
- {
- mean(0, i) = D.col(i).mean();
- }
- // 2. Compute standard dispersion
- MatrixXd s(1, D.cols());
- for (int i = 0; i < D.cols(); i++)
- {
- double ss{ 0 };
- for (int j = 0; j < D.rows(); j++)
- {
- double tmp = (D(j, i) - mean(0, i));
- tmp *= tmp;
- ss += tmp;
- }
- ss /= (D.rows() - 1);
- s(0, i) = sqrt(ss);
- }
- //cout << s << endl;
- // 3. Normalizing input data
- MatrixXd normD(D);
- normD.setZero();
- for (int i = 0; i < D.cols(); i++)
- {
- for (int j = 0; j < D.rows(); j++)
- {
- normD(j, i) = (D(j, i) - mean(0, i)) / s(0, i);
- }
- }
- //cout << normD << endl;
- // 3. Compute the covariance matrix
- mean.setZero();
- for (int i = 0; i < normD.cols(); i++)
- {
- mean(0, i) = normD.col(i).mean();
- }
- MatrixXd covariance(D.cols(), D.cols());
- covariance.setZero();
- for (int k = 0; k < D.cols(); k++)
- {
- for (int j = 0; j < D.cols(); j++)
- {
- double sum{ 0 };
- for (int i = 0; i < D.rows(); i++)
- {
- sum += (normD(i, k) - mean(0, k))*(normD(i, j) - mean(0, j));
- }
- covariance(k, j) = sum / (D.rows() - 1);
- }
- }
- covariance(0, 0) = 0.685693512304251;
- covariance(0, 1) = -0.0424340044742729;
- covariance(0, 2) = 1.27431543624161;
- covariance(0, 3) = 0.516270693512304;
- covariance(1, 0) = -0.0424340044742729;
- covariance(1, 1) = 0.189979418344519;
- covariance(1, 2) = -0.329656375838926;
- covariance(1, 3) = -0.121639373601790;
- covariance(2, 0) = 1.27431543624161;
- covariance(2, 1) = -0.329656375838926;
- covariance(2, 2) = 3.11627785234899;
- covariance(2, 3) = 1.29560939597315;
- covariance(3, 0) = 0.516270693512304;
- covariance(3, 1) = -0.121639373601790;
- covariance(3, 2) = 1.29560939597315;
- covariance(3, 3) = 0.581006263982103;
- //cout << "斜方差" << "\n"<<endl;
- //cout << covariance << endl;
- // 4. Calculate the eigenvalues and eigenvectors for the covariance matrix
- EigenSolver<MatrixXd> solver(covariance);
- MatrixXd eigenVectors = solver.eigenvectors().real();
- VectorXd eigenValues = solver.eigenvalues().real();
- cout << eigenVectors << endl;
- cout << "\n";
- cout << eigenValues << endl;
- cout << "\n";
- // 5. Find out an number of most important principal components with broken stick method
- int newDimension = brockenStickMethod(eigenValues);
- // 6. Return the matrix of eigenVectors appropriate most important components
- MatrixXd featureVectors(D.cols(), newDimension);
- VectorXd sortEV(eigenValues);
- sort(sortEV.derived().data(), sortEV.derived().data() + sortEV.derived().size());
- for (int i = 0; i < newDimension; i++)
- {
- double max = sortEV(sortEV.rows() - 1 - i);
- for (int j = 0; j < eigenValues.size(); j++)
- {
- if (max == eigenValues(j))
- {
- VectorXd tmp = eigenVectors.col(j);
- featureVectors.col(i) = tmp;
- }
- }
- }
-
- return featureVectors;
- }
- private:
- static int brockenStickMethod(VectorXd eigenValues)
- {
- // Compute number of components, which most influed
- double trc{ eigenValues.sum() };
- VectorXd l(eigenValues.size());
- int countPrincComp{ 0 };
- l.setZero();
- for (int i = 0; i < eigenValues.size(); i++)
- {
- double sum{ 0 };
- for (int j = i; j < eigenValues.size(); j++)
- {
- double coef = 1;
- sum += (coef / j);
- }
- l(i) = sum / eigenValues.size();
- }
- VectorXd sortEV(eigenValues);
- sort(sortEV.derived().data(), sortEV.derived().data() + sortEV.derived().size());
- for (int i = 0; i < sortEV.size(); i++)
- {
- if ((sortEV(i) / trc) >l(i)) { countPrincComp++; }
- }
- return countPrincComp;
- }
- };
- #endif // PCA_H
复制代码
这个是pac.h文件。
在测试阶段,我把主成分步骤中corr的值替换成cov,使得它和Matlab版本一致。
|
|