|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 Julia999 于 2019-8-1 09:03 编辑
最近在弄一个项目,需要用C++实现SVM,但是在数据划分的时候出现了问题,大多SVM的实现都使用python,所以数据的划分也少有用C++实现。我重新看了自助法的定义,用C++实现了自助法,在这分享~- #include<iostream>
- #include<string>
- #include<fstream>
- #include<sstream>
- #include<stdlib.h>
- #include<time.h>
- #include<vector>
- using namespace std;
- double x[306][3]; //数据集 不带标签
- double y[306]; //标签集
- vector<vector<double>> v_x;
- vector<double> v_y;
- vector<vector<double>> traindatas;
- vector<double> traindatasy;
- vector<vector<double>> testdatas;
- vector<double> testdatasy;
- vector<int> index3; //放重复的traindatas下标
- vector<int> index1; //放traindatas中的下标,不重复的
- vector<int> index2; //放置testdatas的所有标签
- vector<int> index4; //放置抽取到的训练集标签,不重复的
- int toNum(string str)//Enclave无法接受string类型数据
- {
- int ans = 0;
- for (int i = 0; i < str.length(); i++)
- {
- ans = ans * 10 + (str[i] - '0');
- }
- return ans;
- }
- void loaddata(string path)
- {
- ifstream Filein;
- try { Filein.open(path); }
- catch (exception e)
- {
- cout << "File open failed!";
- }
- string line;
- int data_num = 0;
- while (getline(Filein, line)) {
- int before = 0;
- int cnt = 0;
- data_num++;
- //cout << data_num << endl;
- for (unsigned int i = 0; i < line.length(); i++) {
- if (line[i] == ',' || line[i] == '\n') {
- string sub = line.substr(before, i - before);
- before = i + 1;
- x[data_num - 1][cnt] = toNum(sub);
- cnt++;
- }
- }
- //Data[data_num - 1][cnt] = toNum(line.substr(before, line.length()));
- y[data_num - 1] = toNum(line.substr(before, line.length()));
- }
- cout << "data loading done.\nthe amount of data is: " << data_num << endl;
- }
- vector<double>temp;
- void tovector(double x[306][3])
- {
- for (int i = 0; i < 306; i++)
- {
- for (int j = 0; j < 3; j++)
- {
- temp.push_back(x[i][j]);
- }
- v_x.push_back(temp);
- temp.clear();
- }
- }
- void getindex1()
- {
- srand((unsigned int)time(0));
- for (int i = 0; i < 306; i++)
- {
- if (i == 0)
- {
- int num = rand() % 306;
- index1.push_back(num);
- index3.push_back(num);
- }
- else
- {
- int temp = rand() % 306;
- index3.push_back(temp);
- vector<int>::iterator ret;
- ret = std::find(index1.begin(), index1.end(), temp);
- if (ret == index1.end())
- index1.push_back(temp);
- }
- }
- }
- void getindex2()
- {
- vector<int>::iterator ret;
- for (int i = 0; i < 306; i++)
- {
- ret = std::find(index4.begin(), index4.end(), i);
- if (ret == index4.end()) //说明在index1中没有找到i
- {
- index2.push_back(i);
- }
- }
- }
- void gettraindatas()
- {
- for (int i = 0; i < index3.size(); i++)
- {
- for (int j = 0; j < 3; j++)
- {
- temp.push_back(x[index3[i]][j]);
- }
- traindatas.push_back(temp);
- temp.clear();
- }
- for (int i = 0; i < index3.size(); i++)
- {
- traindatasy.push_back(y[index3[i]]);
- }
- }
- void getindex4() //得到index4,也就是获取了所有抽到的行数
- {
- if (index4.empty()) //空
- {
- for (int i = 0; i < index1.size(); i++)
- {
- index4.push_back(index1[i]);
- }
- }
- else
- {
- for (int i = 0; i < index1.size(); i++)
- {
- vector<int>::iterator ret;
- ret = std::find(index4.begin(), index4.end(), index1[i]);
- if (ret == index4.end())
- index4.push_back(index1[i]);
- }
- }
-
- }
- //void toarray(double **_traindatas,double *_traindatasy)
- //{
- // for (int i = 0; i < index1.size(); i++)
- // {
- // for (int j = 0; j < 3; j++)
- // {
- // _traindatas[i][j] = traindatas[i][j];
- // }
- // }
- //
- // for (int i = 0; i < index1.size(); i++)
- // {
- // _traindatasy[i] = traindatasy[i];
- // }
- //}
- void clearall()
- {
- vector <vector<double>>().swap(traindatas);
- vector<double>().swap(traindatasy);
- vector<int>().swap(index3); //放重复的traindatas下标
- vector<int>().swap(index1); //放traindatas中的下标,不重复的
- vector<int>().swap(index2);
- }
- void gettestdatas() //获取测试集
- {
- for (int i = 0; i < index2.size(); i++)
- {
- for (int j = 0; j < 3; j++)
- {
- temp.push_back(x[index2[i]][j]);
- }
- testdatas.push_back(temp);
- temp.clear();
- }
- for (int i = 0; i < index2.size(); i++)
- {
- testdatasy.push_back(y[index2[i]]);
- }
- }
- int main()
- {
- //1 先将读取的data全部转换成vector类型的
- loaddata("C:\\Users\\YY\\Desktop\\haberman1.txt");
- tovector(x);
- /*for (int i = 0; i < 306; i++)
- {
- for (int j = 0; j < 3; j++)
- {
- cout << v_x[i][j] << " ";
- }
- cout << endl;
- }*/
- for (int i = 0; i < 306; i++)
- {
- cout << "循环第" << i+1 << "次" << endl;
- getindex1();
- cout << "index1:" << index1.size() << endl;
- cout << "index3:" << index3.size() << endl;
- //把抽到的行数放进index4中
- getindex4();
- cout << "index4:" << index4.size() << endl;
- /*getindex2();
- cout << "index2:" << index2.size() << endl;*/
- gettraindatas();
- double _traindatas[306][3];
- double _traindatasy[306];
- for (int i = 0; i < 306; i++)
- {
- _traindatasy[i] = traindatasy[i];
- for (int j = 0; j < 3; j++)
- {
- _traindatas[i][j] = traindatas[i][j];
- }
- }
- if (i == 305)
- {
- cout << endl;
- cout << endl;
- cout << endl;
- getindex2(); //获取到测试集的行数
- cout << "index2:" << index2.size() << endl;
- //获取测试集:
- gettestdatas();
- cout << "打印测试集:" << endl;
- for (int i = 0; i < index2.size(); i++)
- {
- for (int j = 0; j < 3; j++)
- {
- cout << testdatas[i][j] << " ";
- }
- cout << endl;
- }
- }
- //将所有容器全部清空
- clearall();
- }
-
- ////2 再用随机抽取的方法,获取抽取的行数index1和index3,将抽取的行数放在一个一维的vector中
- //getindex1();
- //cout << "index1:" << index1.size() << endl;
- //cout << "index3:" << index3.size() << endl;
- ////3 利用v_index1获取行数v_index2,将其存在一个一维的vector中
- //getindex2();
- //cout << "index2:" << index2.size() << endl;
- ////4 利用v_index1和v_x来找到对应的行数的数据集,将找到的数据集作为测试集放在一个二维的vector traindatas中
- //gettraindatas();
- ////5 将产生的traindatas转成一个二维数组
- //double _traindatas[306][3];
- //double _traindatasy[306];
- ////double **_traindatas = new double*[index1.size()]; //用来保存将v_traindatas转换成的数组
- ////for (int i = 0; i < index1.size(); i++)
- ////{
- //// _traindatas[i] = new double[3];
- ////}
- ////double *_traindatasy = new double[index1.size()];
- ////toarray(_traindatas, _traindatasy); //将v_x转换成了数组的_traindatas
- //for (int i = 0; i < 306; i++)
- //{
- // _traindatasy[i] = traindatasy[i];
- // for (int j = 0; j < 3; j++)
- // {
- // _traindatas[i][j] = traindatas[i][j];
- // }
- //}
- /*for (int i = 0; i < 306; i++)
- {
- for (int j = 0; j < 3; j++)
- {
- cout << _traindatas[i][j] << " " << endl;
- }
- cout << endl;
- }*/
- system("pause");
- return 0;
- }
- /*
- 全部的操作用vector
- 1 先将读取的data全部转换成vector类型的
- 2 再用随机抽取的方法,获取抽取的行数v_index1,将抽取的行数放在一个一维的vector中
- 3 利用v_index1获取行数v_index2,将其存在一个一维的vector中
- 4 利用v_index1和v_x来找到对应的行数的数据集,将找到的数据集作为测试集放在一个二维的vector v_x中
- 5 将产生的traindatas转成一个二维数组
- */
复制代码
|
|