机器学习之自助法划分数据C++实现
本帖最后由 Julia999 于 2019-8-1 09:03 编辑最近在弄一个项目,需要用C++实现SVM,但是在数据划分的时候出现了问题,大多SVM的实现都使用python,所以数据的划分也少有用C++实现。我重新看了自助法的定义,用C++实现了自助法,在这分享~#include<iostream>
#include<string>
#include<fstream>
#include<sstream>
#include<stdlib.h>
#include<time.h>
#include<vector>
using namespace std;
double x;//数据集 不带标签
double y; //标签集
vector<vector<double>> v_x;
vector<double> v_y;
vector<vector<double>> traindatas;
vector<double> traindatasy;
vector<vector<double>> testdatas;
vector<double> testdatasy;
vector<int> index3;//放重复的traindatas下标
vector<int> index1;//放traindatas中的下标,不重复的
vector<int> index2;//放置testdatas的所有标签
vector<int> index4; //放置抽取到的训练集标签,不重复的
int toNum(string str)//Enclave无法接受string类型数据
{
int ans = 0;
for (int i = 0; i < str.length(); i++)
{
ans = ans * 10 + (str - '0');
}
return ans;
}
void loaddata(string path)
{
ifstream Filein;
try { Filein.open(path); }
catch (exception e)
{
cout << "File open failed!";
}
string line;
int data_num = 0;
while (getline(Filein, line)) {
int before = 0;
int cnt = 0;
data_num++;
//cout << data_num << endl;
for (unsigned int i = 0; i < line.length(); i++) {
if (line == ',' || line == '\n') {
string sub = line.substr(before, i - before);
before = i + 1;
x = toNum(sub);
cnt++;
}
}
//Data = toNum(line.substr(before, line.length()));
y = toNum(line.substr(before, line.length()));
}
cout << "data loading done.\nthe amount of data is: " << data_num << endl;
}
vector<double>temp;
void tovector(double x)
{
for (int i = 0; i < 306; i++)
{
for (int j = 0; j < 3; j++)
{
temp.push_back(x);
}
v_x.push_back(temp);
temp.clear();
}
}
void getindex1()
{
srand((unsigned int)time(0));
for (int i = 0; i < 306; i++)
{
if (i == 0)
{
int num = rand() % 306;
index1.push_back(num);
index3.push_back(num);
}
else
{
int temp = rand() % 306;
index3.push_back(temp);
vector<int>::iterator ret;
ret = std::find(index1.begin(), index1.end(), temp);
if (ret == index1.end())
index1.push_back(temp);
}
}
}
void getindex2()
{
vector<int>::iterator ret;
for (int i = 0; i < 306; i++)
{
ret = std::find(index4.begin(), index4.end(), i);
if (ret == index4.end())//说明在index1中没有找到i
{
index2.push_back(i);
}
}
}
void gettraindatas()
{
for (int i = 0; i < index3.size(); i++)
{
for (int j = 0; j < 3; j++)
{
temp.push_back(x]);
}
traindatas.push_back(temp);
temp.clear();
}
for (int i = 0; i < index3.size(); i++)
{
traindatasy.push_back(y]);
}
}
void getindex4()//得到index4,也就是获取了所有抽到的行数
{
if (index4.empty()) //空
{
for (int i = 0; i < index1.size(); i++)
{
index4.push_back(index1);
}
}
else
{
for (int i = 0; i < index1.size(); i++)
{
vector<int>::iterator ret;
ret = std::find(index4.begin(), index4.end(), index1);
if (ret == index4.end())
index4.push_back(index1);
}
}
}
//void toarray(double **_traindatas,double *_traindatasy)
//{
// for (int i = 0; i < index1.size(); i++)
// {
// for (int j = 0; j < 3; j++)
// {
// _traindatas = traindatas;
// }
// }
//
// for (int i = 0; i < index1.size(); i++)
// {
// _traindatasy = traindatasy;
// }
//}
void clearall()
{
vector <vector<double>>().swap(traindatas);
vector<double>().swap(traindatasy);
vector<int>().swap(index3);//放重复的traindatas下标
vector<int>().swap(index1);//放traindatas中的下标,不重复的
vector<int>().swap(index2);
}
void gettestdatas()//获取测试集
{
for (int i = 0; i < index2.size(); i++)
{
for (int j = 0; j < 3; j++)
{
temp.push_back(x]);
}
testdatas.push_back(temp);
temp.clear();
}
for (int i = 0; i < index2.size(); i++)
{
testdatasy.push_back(y]);
}
}
int main()
{
//1 先将读取的data全部转换成vector类型的
loaddata("C:\\Users\\YY\\Desktop\\haberman1.txt");
tovector(x);
/*for (int i = 0; i < 306; i++)
{
for (int j = 0; j < 3; j++)
{
cout << v_x << "";
}
cout << endl;
}*/
for (int i = 0; i < 306; i++)
{
cout << "循环第" << i+1 << "次" << endl;
getindex1();
cout << "index1:" << index1.size() << endl;
cout << "index3:" << index3.size() << endl;
//把抽到的行数放进index4中
getindex4();
cout << "index4:" << index4.size() << endl;
/*getindex2();
cout << "index2:" << index2.size() << endl;*/
gettraindatas();
double _traindatas;
double _traindatasy;
for (int i = 0; i < 306; i++)
{
_traindatasy = traindatasy;
for (int j = 0; j < 3; j++)
{
_traindatas = traindatas;
}
}
if (i == 305)
{
cout << endl;
cout << endl;
cout << endl;
getindex2();//获取到测试集的行数
cout << "index2:" << index2.size() << endl;
//获取测试集:
gettestdatas();
cout << "打印测试集:" << endl;
for (int i = 0; i < index2.size(); i++)
{
for (int j = 0; j < 3; j++)
{
cout << testdatas << "";
}
cout << endl;
}
}
//将所有容器全部清空
clearall();
}
////2 再用随机抽取的方法,获取抽取的行数index1和index3,将抽取的行数放在一个一维的vector中
//getindex1();
//cout << "index1:" << index1.size() << endl;
//cout << "index3:" << index3.size() << endl;
////3 利用v_index1获取行数v_index2,将其存在一个一维的vector中
//getindex2();
//cout << "index2:" << index2.size() << endl;
////4 利用v_index1和v_x来找到对应的行数的数据集,将找到的数据集作为测试集放在一个二维的vector traindatas中
//gettraindatas();
////5 将产生的traindatas转成一个二维数组
//double _traindatas;
//double _traindatasy;
////double **_traindatas = new double*;//用来保存将v_traindatas转换成的数组
////for (int i = 0; i < index1.size(); i++)
////{
//// _traindatas = new double;
////}
////double *_traindatasy = new double;
////toarray(_traindatas, _traindatasy);//将v_x转换成了数组的_traindatas
//for (int i = 0; i < 306; i++)
//{
// _traindatasy = traindatasy;
// for (int j = 0; j < 3; j++)
// {
// _traindatas = traindatas;
// }
//}
/*for (int i = 0; i < 306; i++)
{
for (int j = 0; j < 3; j++)
{
cout << _traindatas << "" << endl;
}
cout << endl;
}*/
system("pause");
return 0;
}
/*
全部的操作用vector
1 先将读取的data全部转换成vector类型的
2 再用随机抽取的方法,获取抽取的行数v_index1,将抽取的行数放在一个一维的vector中
3 利用v_index1获取行数v_index2,将其存在一个一维的vector中
4 利用v_index1和v_x来找到对应的行数的数据集,将找到的数据集作为测试集放在一个二维的vector v_x中
5 将产生的traindatas转成一个二维数组
*/
页:
[1]