深度学习各框架简介
现在市场上流行的深度学习框架很多,常用的有tensorflow, keras,MXNet, Torch, Caffe, Theano等几种,通过对比分析可以得到:框架 开发语言 优劣及难易程度
tensorflow c++/cuda/python 资料全,灵活性好,适应性广,但前期上手难
keras c++/cuda/python 高度分装,简单易行,上手快
mxnet c++/cuda 高度分装,简单易行,上手快
torch c/cuda/lua 高度分装,简单易行,上手快
caffe c++/cuda 速度快,资料相对较全,但是环境配置比较麻烦。
theano c++/cuda/python 比较旧的一个框架,资料相对比较老
利用各种框架构建的mnist数据识别:
1.基于tensorflow的
》利用tf.layers来构建网络
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
tf.set_random_seed(1)
np.random.seed(1)
BATCH_SIZE = 50
LR = 0.001 # learning rate
mnist = input_data.read_data_sets('./mnist', one_hot=True)# they has been normalized to range (0,1)
test_x = mnist.test.images[:2000]
test_y = mnist.test.labels[:2000]
# plot one example
print(mnist.train.images.shape) # (55000, 28 * 28)
print(mnist.train.labels.shape) # (55000, 10)
plt.imshow(mnist.train.images.reshape((28, 28)), cmap='gray')
plt.title('%i' % np.argmax(mnist.train.labels)); plt.show()
tf_x = tf.placeholder(tf.float32, ) / 255.
image = tf.reshape(tf_x, [-1, 28, 28, 1]) # (batch, height, width, channel)
tf_y = tf.placeholder(tf.int32, ) # input y
# CNN
conv1 = tf.layers.conv2d( # shape (28, 28, 1)
inputs=image,
filters=16,
kernel_size=5,
strides=1,
padding='same',
activation=tf.nn.relu
) # -> (28, 28, 16)
pool1 = tf.layers.max_pooling2d(
conv1,
pool_size=2,
strides=2,
) # -> (14, 14, 16)
conv2 = tf.layers.conv2d(pool1, 32, 5, 1, 'same', activation=tf.nn.relu) # -> (14, 14, 32)
pool2 = tf.layers.max_pooling2d(conv2, 2, 2) # -> (7, 7, 32)
flat = tf.reshape(pool2, [-1, 7*7*32]) # -> (7*7*32, )
output = tf.layers.dense(flat, 10) # output layer
loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output) # compute cost
train_op = tf.train.AdamOptimizer(LR).minimize(loss)
accuracy = tf.metrics.accuracy( # return (acc, update_op), and create 2 local variables
labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(output, axis=1),)
sess = tf.Session()
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # the local var is for accuracy_op
sess.run(init_op) # initialize var in graph
# following function (plot_with_labels) is for visualization, can be ignored if not interested
from matplotlib import cm
try: from sklearn.manifold import TSNE; HAS_SK = True
except: HAS_SK = False; print('\nPlease install sklearn for layer visualization\n')
def plot_with_labels(lowDWeights, labels):
plt.cla(); X, Y = lowDWeights[:, 0], lowDWeights[:, 1]
for x, y, s in zip(X, Y, labels):
c = cm.rainbow(int(255 * s / 9)); plt.text(x, y, s, backgroundcolor=c, fontsize=9)
plt.xlim(X.min(), X.max()); plt.ylim(Y.min(), Y.max()); plt.title('Visualize last layer'); plt.show(); plt.pause(0.01)
plt.ion()
for step in range(600):
b_x, b_y = mnist.train.next_batch(BATCH_SIZE)
_, loss_ = sess.run(, {tf_x: b_x, tf_y: b_y})
if step % 50 == 0:
accuracy_, flat_representation = sess.run(, {tf_x: test_x, tf_y: test_y})
print('Step:', step, '| train loss: %.4f' % loss_, '| test accuracy: %.2f' % accuracy_)
if HAS_SK:
# Visualization of trained flatten layer (T-SNE)
tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000); plot_only = 500
low_dim_embs = tsne.fit_transform(flat_representation[:plot_only, :])
labels = np.argmax(test_y, axis=1)[:plot_only]; plot_with_labels(low_dim_embs, labels)
plt.ioff()
# print 10 predictions from test data
test_output = sess.run(output, {tf_x: test_x[:10]})
pred_y = np.argmax(test_output, 1)
print(pred_y, 'prediction number')
print(np.argmax(test_y[:10], 1), 'real number')
》利用tf.nn来构建网络
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
##定义权值
def weight_variable(shape):
initial = tf.truncated_normal(shape,stddev=0.1)
return tf.Variable(initial)
##定义偏置
def bias_variable(shape):
initial = tf.constant(0.1,shape=shape)
return tf.Variable(initial)
##定义卷积
def conv2d(x,W):
#srtide步长参数说明
return tf.nn.conv2d(x,W,strides = ,padding='SAME')
#x为输入,W为卷积参数,表示5*5的卷积核,1个channel,32个卷积核。strides表示模板移动步长,SAME和VALID两种形式的padding,valid抽取出来的是在原始图片直接抽取,结果比原始图像小,same为原始图像补零后抽取,结果与原始图像大小相同。
##定义pooling
def max_pool_2x2(x):
#ksize strides
return tf.nn.max_pool(x,ksize=,strides=,padding='SAME')
##定义计算准确度的功能
def compute_accuracy(v_xs,v_ys):
global prediction
y_pre = sess.run(prediction,feed_dict={xs:v_xs, keep_prob:1})
correct_prediction = tf.equal(tf.argmax(y_pre,1),tf.argmax(v_ys,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
result = sess.run(accuracy,feed_dict={xs:v_xs,ys:v_ys,keep_prob:1})
return result
##输入
xs = tf.placeholder(tf.float32,)#28*28
ys = tf.placeholder(tf.float32,)#10个输出
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(xs,[-1,28,28,1])
#-1代表样本数不定,28*28大小的图片,1表示通道数
#print(x_image.shape)
##卷积层conv1
W_conv1 = weight_variable()#第一层卷积:卷积核大小5x5,1个颜色通道,32个卷积核
b_conv1 = bias_variable()#第一层偏置
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1)+b_conv1)#第一层输出:输出的非线性处理28x28x32
h_pool1 = max_pool_2x2(h_conv1)#输出为14x14x32
##卷积层conv2
W_conv2 = weight_variable()
b_conv2 = bias_variable()
h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2)+b_conv2)
h_pool2 = max_pool_2x2(h_conv2)#输出为7x7x64
##全连接层,隐含层的节点个数为1024
W_fc1 = weight_variable()
b_fc1 = bias_variable()
h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64])
#将2D图像变成1D数据->>
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)#非线性激活函数
h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)#防止过拟合
##softmaxe层
W_fc2 = weight_variable()
b_fc2 = bias_variable()
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2)+b_fc2)
#输出误差loss,算法cross_entropy+softmax就可以生成分类算法
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys*tf.log(prediction),reduction_indices=))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
sess = tf.Session()
#重要的初始化变量操作
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
for i in range(1000):
batch_xs,batch_ys = mnist.train.next_batch(100)#从下载好的数据集提取100个数据,mini_batch
sess.run(train_step,feed_dict={xs:batch_xs,ys:batch_ys,keep_prob:0.5})
if i%50 == 0:
print(compute_accuracy(mnist.test.images, mnist.test.labels))
saver.restore(sess,"my_net/save_net.ckpt")
2.基于keras
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
(X_train, y_train), (X_test, y_test) = mnist.load_data()# 获得MNIST数据集
print(X_train.shape)
print(y_train)
X_train = X_train.reshape(X_train.shape, 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape, 28, 28, 1).astype('float32')
# 归一化
X_train /= 255
X_test /= 255
# 独热编码
def tran_y(y):
y_ohe = np.zeros(10)
y_ohe = 1
return y_ohe
# 把标签进行独热编码
y_train_ohe = np.array() for i in range(len(y_train))])
y_test_ohe = np.array() for i in range(len(y_test))])
# 创建序列模型
model = Sequential()
# 添加卷积层,64个滤波器,卷积核大小3x3,平移步长1,填充方式:补零,设定输入层维度,激活函数relu
model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding='same', input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2))) # 池化层,取2x2格子中的最大值
model.add(Dropout(0.5)) # dropout层,概率0.5,防止过拟合,提高泛化能力
model.add(Conv2D(128, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
# 把当前层节点展平
model.add(Flatten())
# 添加全连接层
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax')) # 10个神经元,对应输出层
# 编译模型,指定损失函数(一般分类问题的损失函数都采用交叉熵),优化器,度量
model.compile(loss='categorical_crossentropy', optimizer= 'adagrand', metrics=['accuracy'])
# 放入批量样本进行,训练模型
model.fit(X_train, y_train_ohe, validation_data=(X_test, y_test_ohe), epochs=20, batch_size=128)
# 在测试集上评估模型的准确度
scores = model.evaluate(X_test, y_test_ohe, verbose=0)
keras的后端可以是tensorflow或theano,默认是tensorflow,想要修改的话可参考链接https://blog.csdn.net/a214704/article/details/86304854
3、基于mxnet的
》基于symbol的
#encoding:utf-8
import logging # 对于输出每一轮的训练信息很重要
logging.getLogger().setLevel(logging.INFO)
import os
import mxnet as mx
from mxnet import nd
# 准备数据,并放到NDArrayIter迭代器中
mnist = mx.test_utils.get_mnist()
mx.random.seed(42)
batch_size = 100
train_iter = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size)
# 定义网络
data = mx.sym.var('data')
conv1 = mx.sym.Convolution(data=data, kernel=(3,3), num_filter=20)
relu1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))
conv2 = mx.sym.Convolution(data=pool1, kernel=(3,3), num_filter=20)
relu2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
relu3 = mx.sym.Activation(data=fc1, act_type="relu")
fc2 = mx.sym.FullyConnected(data=relu3, num_hidden=10)
cnn_symbol = mx.sym.SoftmaxOutput(data=fc2, name="softmax")
# 定义module
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
cnn_model = mx.mod.Module(symbol=cnn_symbol, context=ctx)
# 训练
cnn_model.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1},
batch_end_callback = mx.callback.Speedometer(batch_size, 100), # 100个batch以后输出一次训练信息
eval_metric='acc',
num_epoch=10)# 训练10个epochs,也就是训练集数据走10遍
# 测试
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = cnn_model.predict(test_iter) # 测试1
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
acc = mx.metric.Accuracy()
cnn_model.score(test_iter, acc) # 测试2
print(acc)
assert acc.get() > 0.98, "Achieved accuracy (%f) is lower than expected (0.98)" % acc.get()
》基于gluon的
import gluonbook as gb
from mxnet import autograd,nd,init,gluon
from mxnet.gluon import loss as gloss,data as gdata,nn,utils as gutils
import mxnet as mx
net = nn.Sequential()
with net.name_scope():
net.add(
nn.Conv2D(channels=32, kernel_size=5, activation='relu'),
nn.MaxPool2D(pool_size=2, strides=2),
nn.Flatten(),
nn.Dense(128, activation='sigmoid'),
nn.Dense(10, activation='sigmoid')
)
lr = 0.5
batch_size=256
ctx = mx.gpu()
net.initialize(init=init.Xavier(), ctx=ctx)
train_data, test_data = gb.load_data_fashion_mnist(batch_size)
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate' : lr})
loss = gloss.SoftmaxCrossEntropyLoss()
num_epochs = 30
def train(train_data, test_data, net, loss, trainer,num_epochs):
for epoch in range(num_epochs):
total_loss = 0
for x,y in train_data:
with autograd.record():
x = x.as_in_context(ctx)
y = y.as_in_context(ctx)
y_hat=net(x)
l = loss(y_hat,y)
l.backward()
total_loss += l
trainer.step(batch_size)
mx.nd.waitall()
print("Epoch [{}]: Loss {}".format(epoch, total_loss.sum().asnumpy()/(batch_size*len(train_data))))
if __name__ == '__main__':
try:
ctx = mx.gpu()
_ = nd.zeros((1,), ctx=ctx)
except:
ctx = mx.cpu()
ctx
gb.train(train_data,test_data,net,loss,trainer,ctx,num_epochs)
4、基于pytorch的
# 运用CNN分析MNIST手写数字分类
import torch
import numpy as np
from torch.utils.data import DataLoader
from torchvision.datasets import mnist
from torch importnn
from torch.autograd import Variable
from torch importoptim
from torchvision import transforms
# 定义CNN
class CNN(nn.Module):
def __init__(self):
super(CNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1,16,kernel_size=3), # 16, 26 ,26
nn.BatchNorm2d(16),
nn.ReLU(inplace=True))
self.layer2 = nn.Sequential(
nn.Conv2d(16,32,kernel_size=3),# 32, 24, 24
nn.BatchNorm2d(32),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2)) # 32, 12,12 (24-2) /2 +1
self.layer3 = nn.Sequential(
nn.Conv2d(32,64,kernel_size=3), # 64,10,10
nn.BatchNorm2d(64),
nn.ReLU(inplace=True))
self.layer4 = nn.Sequential(
nn.Conv2d(64,128,kernel_size=3),# 128,8,8
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2,stride=2))# 128, 4,4
self.fc = nn.Sequential(
nn.Linear(128 * 4 * 4,1024),
nn.ReLU(inplace=True),
nn.Linear(1024,128),
nn.ReLU(inplace=True),
nn.Linear(128,10))
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = x.view(x.size(0),-1)
x = self.fc(x)
return x
# 使用内置函数下载mnist数据集
train_set = mnist.MNIST('./data',train=True)
test_set = mnist.MNIST('./data',train=False)
# 预处理=>将各种预处理组合在一起
data_tf = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize(,)])
train_set = mnist.MNIST('./data',train=True,transform=data_tf,download=True)
test_set = mnist.MNIST('./data',train=False,transform=data_tf,download=True)
train_data = DataLoader(train_set,batch_size=64,shuffle=True)
test_data = DataLoader(test_set,batch_size=128,shuffle=False)
net = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),1e-1)
nums_epoch = 20
# 开始训练
losses =[]
acces = []
eval_losses = []
eval_acces = []
for epoch in range(nums_epoch):
train_loss = 0
train_acc = 0
net = net.train()
for img , label in train_data:
#img = img.reshape(img.size(0),-1)
img = Variable(img)
label = Variable(label)
# 前向传播
out = net(img)
loss = criterion(out,label)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 记录误差
train_loss += loss.item()
# 计算分类的准确率
_,pred = out.max(1)
num_correct = (pred == label).sum().item()
acc = num_correct / img.shape
train_acc += acc
losses.append(train_loss / len(train_data))
acces.append(train_acc / len(train_data))
eval_loss = 0
eval_acc = 0
# 测试集不训练
for img , label in test_data:
#img = img.reshape(img.size(0),-1)
img = Variable(img)
label = Variable(label)
out = net(img)
loss = criterion(out,label)
# 记录误差
eval_loss += loss.item()
_ , pred = out.max(1)
num_correct = (pred==label).sum().item()
acc = num_correct / img.shape
eval_acc += acc
eval_losses.append(eval_loss / len(test_data))
eval_acces.append(eval_acc / len(test_data))
print('Epoch {} Train Loss {} TrainAccuracy {} Teat Loss {} Test Accuracy {}'.format(
epoch+1, train_loss / len(train_data),train_acc / len(train_data), eval_loss / len(test_data), eval_acc / len(test_data)))
5.基于caffe
官网的详细教程:Training LeNet on MNIST with Caffe
6.基于theano
这里可以参考官网http://www.deeplearning.net/software/theano/tutorial/index.html,或者将keras后端修改成theano即可。
补:各神经网络框架之间相互转换:
ONNX(Open Neural Network Exchange,开放神经网络交换)是一种针对机器学习所设计的开放式的文件格式,用于存储训练好的模型。它使得不同的人工智能框架(如Pytorch, MXNet)可以采用相同格式存储模型数据并交互。 ONNX的规范及代码主要由微软,亚马逊 ,Facebook 和 IBM 等公司共同开发,以开放源代码的方式托管在Github上。目前官方支持加载ONNX模型并进行推理的深度学习框架有: Caffe2, PyTorch, MXNet,ML.NET,TensorRT 和 Microsoft CNTK,并且 TensorFlow 也非官方的支持ONNX。详情可以参考我的博客https://blog.csdn.net/xiaomu_347/article/details/100052809
页:
[1]