tf.slim构建vgg16和resnet网络实现图像分类,亲测准确率99%
如果想让代码简洁,可读性强,tensorflow 中contrib模块中的slim库是一个不错的选择,本文将基于slim来实现对猫和狗的二分类,利用开源的resnet和vggnet,可以在训练的时候选择网络类型,预测的准确性可以达到99%…(如下图)
1.我们需要将数据转换为tfrecords的格式,这种格式存储数据在读取时能够更快、对内存更加友好,是tensorflow推荐提供的一种格式:
2.搭建前向传播神经网络forward()
即:构建神经网络,resnet101,152,vggnet16等
3.搭建反向传播神经网络backward()
即:loss损失函数,选择优化器,指数下降学习率,读取数据
4.建立循环体,创建session会话开始训练
5.保存模型
6.测试验证
这里我们需要测试集,所以在建立tfrecords时需要将整个数据打乱:
需要设置训练集的比例:
train_percent = 0.90
#获取数据集的文件
data_path = FLAGS.data
#利用os.listdir得到文件夹内的所有数据集文件名
total = os.listdir(data_path)
num = len(total)
#得到list_所有文件的索引值
list_ = range(num)
#利用train_percent区分训练集和测试集
train_num = int(num * train_percent)
test_num = num-train_num
#利用random来打乱得到训练集的乱序索引
train_index = random.sample(list_,train_num)
文件名:creat_tfrecords.py
运行方式:
python creat_tfrecords.py --data [图像数据集路径]
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import random
objects = ['cat','dog']#'cat'0,'dog'1
#path='../../SecondProject/dog_and_cat_200'
train_percent = 0.90
filename_train="./data/train.tfrecords"
filename_test="./data/test.tfrecords"
writer_train= tf.python_io.TFRecordWriter(filename_train)
writer_test= tf.python_io.TFRecordWriter(filename_test)
tf.app.flags.DEFINE_string(
'data', 'None', 'where the datas?.')
FLAGS = tf.app.flags.FLAGS
if(FLAGS.data == None):
os._exit(0)
data_path = FLAGS.data
total = os.listdir(data_path)
num = len(total)
list_ = range(num)
train_num = int(num * train_percent)
test_num = num-train_num
train_index = random.sample(list_,train_num)
dim = (224,224)
object_path = data_path
for index in list_:
if index in train_index:
img_path=os.path.join(object_path,total[index])
print(img_path)
img=Image.open(img_path)
img=img.resize(dim)
img_raw=img.tobytes()
if 'cat' in total[index]:
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
index_ = 0
elif'dog' in total[index]:
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[1])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
index_ = 1
writer_train.write(example.SerializeToString()) #序列化为字符串
print([index_,total[index]])
else:
img_path=object_path+total[index]
print(img_path)
img=Image.open(img_path)
img=img.resize(dim)
img_raw=img.tobytes()
if 'cat' in total[index]:
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[0])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
index_ = 0
elif'dog' in total[index]:
example = tf.train.Example(features=tf.train.Features(feature={
'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[1])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
}))
index_ = 1
writer_test.write(example.SerializeToString()) #序列化为字符串
print([index_,total[index]])
writer_train.close()
writer_test.close()
首先需要搭建resnet网络(非本人所写,这一段是开源的):
文件名:resnet_v2.py
# -*- coding:UTF-8 -*-
import collections
import tensorflow as tf
slim = tf.contrib.slim
utils = slim.utils
class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
'''
使用collections.namedtuple设计ResNet基本block模块组的named tuple,
定义一个典型的Block需要输入三个参数:
scope: Block的名称
unit_fn:ResNet V2中的残差学习单元
args: 它决定该block有几层,格式是[(depth, depth_bottleneck, stride)]
示例:Block('block1', bottleneck, [(256,64,1),(256,64,1),(256,64,2)])
'''
def subsample(inputs, factor, scope=None):
if factor == 1:
return inputs
else:
return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)
def conv2d_same(inputs, num_outputs, kernel_size, stride, scope=None):
"""
if stride>1, then we do explicit zero-padding, followed by conv2d with 'VALID' padding
"""
if stride == 1:
return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, padding='SAME', scope=scope)
else:
pad_total = kernel_size - 1
pad_beg = pad_total // 2
pad_end = pad_total - pad_beg
inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])
return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, padding='VALID', scope=scope)
#---------------------定义堆叠Blocks的函数-------------------
@slim.add_arg_scope
def stack_blocks_dense(net, blocks, outputs_collections=None):
"""
Args:
net: A Tensor of size [batch, height, width, channels].输入
blocks: 是之前定义的Block的class的列表。
outputs_collections: 收集各个end_points的collections
Returns:
net: Output tensor
"""
# 循环Block类对象的列表blocks,即逐个Residual Unit地堆叠
for block in blocks:
with tf.variable_scope(block.scope, 'block', [net]) as sc:
for i, unit in enumerate(block.args):
with tf.variable_scope('unit_%d' % (i + 1), values=[net]):
unit_depth, unit_depth_bottleneck, unit_stride = unit
net = block.unit_fn(net, depth=unit_depth, depth_bottleneck=unit_depth_bottleneck,
stride=unit_stride)
net = utils.collect_named_outputs(outputs_collections, sc.name, net)
return net
# 创建ResNet通用的arg_scope,arg_scope用来定义某些函数的参数默认值
def resnet_arg_scope(is_training=True, # 训练标记
weight_decay=0.0001, # 权重衰减速率
batch_norm_decay=0.997, # BN的衰减速率
batch_norm_epsilon=1e-5, # BN的epsilon默认1e-5
batch_norm_scale=True): # BN的scale默认值
batch_norm_params = { # 定义batch normalization(标准化)的参数字典
'is_training': is_training,
'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon,
'scale': batch_norm_scale,
'updates_collections': tf.GraphKeys.UPDATE_OPS,
}
with slim.arg_scope( # 通过slim.arg_scope将[slim.conv2d]的几个默认参数设置好
[slim.conv2d],
weights_regularizer=slim.l2_regularizer(weight_decay), # 权重正则器设置为L2正则
weights_initializer=slim.variance_scaling_initializer(), # 权重初始化器
activation_fn=tf.nn.relu, # 激活函数
normalizer_fn=slim.batch_norm, # 标准化器设置为BN
normalizer_params=batch_norm_params):
with slim.arg_scope([slim.batch_norm], **batch_norm_params):
with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
return arg_sc # 最后将基层嵌套的arg_scope作为结果返回
#------------------定义核心的bottleneck残差学习单元--------------------
@slim.add_arg_scope
def bottleneck(inputs, depth, depth_bottleneck, stride,
outputs_collections=None, scope=None):
"""
Args:
inputs: A tensor of size [batch, height, width, channels].
depth、depth_bottleneck:、stride三个参数是前面blocks类中的args
rate: An integer, rate for atrous convolution.
outputs_collections: 是收集end_points的collection
"""
with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
depth_in = utils.last_dimension(inputs.get_shape(), min_rank=4) #最后一个维度,即输出通道数
preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
if depth == depth_in:
# 如果残差单元的输入通道数和输出通道数一致,那么按步长对inputs进行降采样
shortcut = subsample(inputs, stride, 'shortcut')
else:
# 如果不一样就按步长和1*1的卷积改变其通道数,使得输入、输出通道数一致
shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride,
normalizer_fn=None, activation_fn=None,
scope='shortcut')
# 先是一个1*1尺寸,步长1,输出通道数为depth_bottleneck的卷积
residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1')
# 然后是3*3尺寸,步长为stride,输出通道数为depth_bottleneck的卷积
residual = conv2d_same(residual, depth_bottleneck, 3, stride, scope='conv2')
# 最后是1*1卷积,步长1,输出通道数depth的卷积,得到最终的residual。最后一层没有正则项也没有激活函数
residual = slim.conv2d(residual, depth, [1, 1], stride=1,
normalizer_fn=None, activation_fn=None,
scope='conv3')
# 将降采样的结果和residual相加
output = shortcut + residual
return utils.collect_named_outputs(outputs_collections, sc.name, output)
#-------------------定义生成resnet_v2网络的主函数------------------
def resnet_v2(inputs, blocks, num_classes=None, global_pool=True,
include_root_block=True, reuse=None, scope=None):
with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
end_points_collection = sc.original_name_scope + '_end_points'
with slim.arg_scope([slim.conv2d, bottleneck, stack_blocks_dense],
outputs_collections=end_points_collection):
net = inputs
if include_root_block: # 根据标记值,创建resnet最前面的64输出通道的步长为2的7*7卷积,然后接最大池化
with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None):
net = conv2d_same(net, 64, 7, stride=2, scope='conv1')
net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
# 经历过两个步长为2的层图片缩为1/4
net = stack_blocks_dense(net, blocks) # 将残差学习模块组生成好
net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
if global_pool:
net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) # tf.reduce_mean实现全局平均池化效率比avg_pool高
if num_classes is not None: # 是否有通道数
net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, # 无激活函数和正则项
normalizer_fn=None, scope='logits') # 添加一个输出通道num_classes的1*1的卷积
end_points = utils.convert_collection_to_dict(end_points_collection) # 将collection转化为python的dict
if num_classes is not None:
end_points['predictions'] = slim.softmax(net, scope='predictions') # 输出网络结果
return net, end_points
#-------------------建立模型 ResNet-50/101/152/200 model--------------------
def resnet_v2_50(inputs, num_classes=None, global_pool=True, reuse=None,
scope='resnet_v2_50'):
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
# Args::
# 'block1':Block名称(或scope)
# bottleneck:ResNet V2残差学习单元
# [(256, 64, 1)] * 2 + [(256, 64, 2)]:Block的Args,Args是一个列表。其中每个元素都对应一个bottleneck
# 前两个元素都是(256, 64, 1),最后一个是(256, 64, 2)。每个元素
# 都是一个三元tuple,即(depth,depth_bottleneck,stride)。
# (256, 64, 3)代表构建的bottleneck残差学习单元(每个残差学习单元包含三个卷积层)中,第三层输出通道数
# depth为256,前两层输出通道数depth_bottleneck为64,且中间那层步长3。这个残差学习单元结构为:
# [(1*1/s1,64),(3*3/s2,64),(1*1/s1,256)]
Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block=True, reuse=reuse, scope=scope)
def resnet_v2_101(inputs, num_classes=None, global_pool=True, reuse=None,
scope='resnet_v2_101'):
"""ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block=True, reuse=reuse, scope=scope)
# unit提升的主要场所是block3
def resnet_v2_152(inputs, num_classes=None, global_pool=True, reuse=None,
scope='resnet_v2_152'):
"""ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block=True, reuse=reuse, scope=scope)
# unit提升的主要场所是block2
def resnet_v2_200(inputs, num_classes=None, global_pool=True, reuse=None,
scope='resnet_v2_200'):
"""ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
blocks = [
Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]),
Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]),
Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]),
Block('block4', bottleneck, [(2048, 512, 1)] * 3)]
return resnet_v2(inputs, blocks, num_classes, global_pool,
include_root_block=True, reuse=reuse, scope=scope)
搭建vggnet16:
文件名:vgg16.py
import tensorflow as tf
import numpy as np
slim = tf.contrib.slim
def vgg16_WithoutArgvs(inputs):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
normalizer_fn=slim.batch_norm,
weights_initializer=slim.variance_scaling_initializer(),
normalizer_params=
{
'is_training': True,
'decay': 0.997, # batch_norm_decay=0.997
'epsilon': 1e-5, # BN的epsilon默认1e-5
'scale': True, # BN的scale默认值
'updates_collections': tf.GraphKeys.UPDATE_OPS,
},
weights_regularizer=slim.l2_regularizer(0.0005)):
# 主体网络结构
# Layer 1
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], trainable=True, scope='conv1')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1')
# Layer 2
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=True, scope='conv2')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2')
# Layer 3
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=True, scope='conv3')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3')
# Layer 4
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=True, scope='conv4')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4')
# Layer 5
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=True, scope='conv5')
net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool5')
#fully connected
net = slim.flatten(net,scope='flat5')
net = slim.fully_connected(net, 4096, scope='fc6')
net = slim.dropout(net, 0.5, scope='dropout6')
net = slim.fully_connected(net, 4096, scope='fc7')
net = slim.dropout(net, 0.5, scope='dropout7')
net = slim.fully_connected(net, 1000, activation_fn=None, scope='fc8')
return net
搭建simplenet:
def forward_simple(datas_train,is_training):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(0.0005)):
net = slim.conv2d(datas_train,64 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.conv2d(net,128 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.conv2d(net,128 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.flatten(net)
net = slim.fully_connected(net, 1024, scope='fc1')
net = slim.dropout(net, 0.5, scope='dropout1')
net = slim.fully_connected(net, 512, scope='fc2')
net = slim.dropout(net, 0.5, scope='dropout2')
net = slim.fully_connected(net, 64, activation_fn=None, scope='fc3')
net = slim.dropout(net, 0.5, scope='dropout3')
net = slim.fully_connected(net,num_classes)
outputs = slim.softmax(net, scope='predictions')
return outputs
建立train.py,import上述两个文件:
forward():(我还自己搭建了一个简单的网络,simple网络,用来与其他几个对比用的,可以忽略)
def forward(datas_train,is_training):
if FLAGS.typenets == 'vggnet16':
net = vggnet(datas_train)
net = slim.fully_connected(net,num_classes,activation_fn=None,scope='fc1')
#the last layer
outputs = slim.softmax(net, scope='predictions')
#outputs = tf.nn.softmax(net,namescope='output')
elif FLAGS.typenets == 'resnet50':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_50(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet101':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_101(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet152':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_152(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet200':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_200(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'simple':
outputs = forward_simple(datas_train,is_training)
return outputs
x_datas = tf.placeholder(tf.float32,[None,224,224,3],name='Input')
y_labels = tf.placeholder(tf.float32,[None,2],name='labels')
is_training = tf.placeholder(dtype=tf.bool,name='is_training')
predictions = forward(x_datas,is_training)
最后的output使用的是softmax函数输出,所以在这应该是用交叉熵损失:
loss = slim.losses.softmax_cross_entropy(predictions, y_labels)
将损失函数的过程记录下来,可使用tensorboard查看:
with tf.name_scope('cross_entropy'):
loss = slim.losses.get_total_loss(add_regularization_losses=True)
tf.summary.scalar('cross_entropy',loss)
同样也将其变化记录:
correct_prediction = tf.equal(tf.argmax(predictions,1),tf.argmax(y_labels,1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar('accuracy',accuracy)
防止网络过早的收敛出现反复横跳,同时准确性低:
#指数衰减学习率
global_step = tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
decay_step, #dataset_num/batch_size,
LRARNING_RATE_DECAY,
staircase=True)
#Adams优化器
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=0.01)
由于在网络中使用了BN层,所以在默认保存网络的时候不会保存归一化的两个参数:gamma、theta,我们需要它的滑动平均值作为最后保存的参数:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops): #保证train_op在update_ops执行之后再执行。
train_step = optimizer.minimize(loss)
由于本身已经打乱,所以不需要在建立shuffle
如果未打乱数据,推荐使用tf.train.batch()函数
dsets, dlabel = read_and_decode(tfrecords_path)
img_batch, label_batch = tf.train.batch([dsets,dlabel],
batch_size=batch_size,
num_threads=4,
capacity= 640)
label_batch= tf.one_hot(label_batch,num_classes)
sets_test, label_test = read_and_decode(test_path)
dsets_test, dlabel_test = tf.train.batch([sets_test, label_test],
batch_size=16,
num_threads=3,
capacity = 64)
dlabel_test= tf.one_hot(dlabel_test,num_classes)
merged = tf.summary.merge_all()
首先:
with tf.Session() as sess:
再:
#初始化日志
summary_writer = tf.summary.FileWriter(log_path,sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
这一步是必须的,否则数据无法读入
coord = tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
for i in range(num_steps):
#计算时间
start_time = time.time()
img_trains,label_trains = sess.run([img_batch, label_batch])
_,acc,los,summary= sess.run([train_step,accuracy,loss,merged],feed_dict={x_datas:img_trains,y_labels:label_trains,is_training:True})
#添加日志
summary_writer.add_summary(summary,i)
#测试已经训练的模型
img_test,label_test = sess.run([dsets_test, dlabel_test])
y_test,acc_test = sess.run([predictions,accuracy],feed_dict={x_datas:img_test,y_labels:label_test,is_training:False})
#print(y_test)
duration = time.time()-start_time
#打印输出信息
if i % 5 == 0:
print("the accuracy is : %.2f%%,the loss is : [%.8f],the total step is :[%i] " %(acc*100,los,i),end='')
print("test accuracy is : %.2f%%,%.3fs/step"%(acc_test*100,duration))
if i % 1000 == 0:
#中间保存模型
saver_path = saver.save(sess,model_path,global_step = i,write_meta_graph=True)
由于使用了BN层,所有需要保存它参数的滑动平均值
利用tf.train.Saver()保存,第一个参数是保存参数列表(已经将滑动平均值加入到列表),第二个参数是保存最大数,第三个参数是保存checkpoint的时间 (小时/每次)
#处理BN层,获取滑动平均值
var_list = tf.trainable_variables()
g_list = tf.global_variables()
bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
var_list += bn_moving_vars
saver = tf.train.Saver(var_list=var_list,max_to_keep=2,keep_checkpoint_every_n_hours=1)
在主循环中:
if i % 1000 == 0:
#中间保存模型
saver_path = saver.save(sess,model_path,global_step = i,write_meta_graph=True)
最后再保存一下:
#保存最后模型 ckpt,meta,data
saver_path = saver.save(sess,model_path,global_step = num_steps,write_meta_graph=True)
----------------------可直接跳到最后github拿代码------------------------
# -*- coding: utf-8 -*-
"""
designer by zouyuelin
Tianjin University(TJU)
"""
import tensorflow as tf
import numpy as np
import vgg16
import resnet_v2
from PIL import Image
import os
import cv2
import time
slim = tf.contrib.slim
vggnet = vgg16.vgg16_WithoutArgvs
images_path = '../../SecondProject/data/'
tfrecords_path = './data/train.tfrecords'#train_200.tfrecords
test_path = './data/test.tfrecords'#test_200.tfrecords
model_path = './model/model.ckpt'
log_path = './log/log'
pb_path = './model/classify.pb'
pbtxt_path = './model/'
labels = ['cat','dog']
#set the nets' argvs
#相关参数,学习率,衰减率
batch_size = 64
num_steps = 100
LEARNING_RATE_BASE = 0.003
LRARNING_RATE_DECAY = 0.96
dataset_num = 25000
decay_step = 300
num_classes = 2
dim = (224,224)
tf.app.flags.DEFINE_string(
'typenets', 'resnet101', 'type of the training nets.')
FLAGS = tf.app.flags.FLAGS
#读取tfrecord
def read_and_decode(tfrecords_path):
filename_queue = tf.train.string_input_producer([tfrecords_path],shuffle=True)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw' : tf.FixedLenFeature([], tf.string),})
image = tf.decode_raw(features['img_raw'], tf.uint8)
image = tf.reshape(image,[224,224,3])#reshape 200*200*3
image = tf.cast(image,tf.float32)*(1./255)#image张量可以除以255,*(1./255)
label = tf.cast(features['label'], tf.int32)
return image,label
#定义one_hot函数,int = 1 表示孔标签,也可以使用tf.one_hot()定义
#记住用tf的one_hot函数时不要放到循环体里,由于tensorflow静态图的机制,
#定义图会原来越大,训练会原来越慢,内存吃满
def one_hot(labels,Label_class):
one_hot_label = np.array([[int(i == int(labels[j])) for i in range(Label_class)] for j in range(len(labels))])
return one_hot_label
#加载图
def load_graph(model_file):
graph = tf.Graph()
graph_def = tf.GraphDef()
with open(model_file,"rb") as f:
graph_def.ParseFromString(f.read())
with graph.as_default():
tf.import_graph_def(graph_def)
return graph
#set the forward nets
#较为复杂的前向传播
def forward(datas_train,is_training):
if FLAGS.typenets == 'vggnet16':
net = vggnet(datas_train)
net = slim.fully_connected(net,num_classes,activation_fn=None,scope='fc1')
#the last layer
outputs = slim.softmax(net, scope='predictions')
#outputs = tf.nn.softmax(net,namescope='output')
elif FLAGS.typenets == 'resnet50':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_50(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet101':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_101(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet152':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_152(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'resnet200':
with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training=is_training)):
net,_ = resnet_v2.resnet_v2_200(datas_train,num_classes=num_classes)
net = slim.flatten(net,scope='flat2')
outputs = slim.softmax(net, scope='predictions')
elif FLAGS.typenets == 'simple':
outputs = forward_simple(datas_train,is_training)
return outputs
#简单的神经网络-------------------designed by zouyuelin(Tianjin university)
#使用简单的神经网络即可达到很高的检测准确率
def forward_simple(datas_train,is_training):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
weights_regularizer=slim.l2_regularizer(0.0005)):
net = slim.conv2d(datas_train,64 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.conv2d(net,128 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.conv2d(net,128 , [3, 3])
net = slim.max_pool2d(net, [2, 2])
net = slim.flatten(net)
net = slim.fully_connected(net, 1024, scope='fc1')
net = slim.dropout(net, 0.5, scope='dropout1')
net = slim.fully_connected(net, 512, scope='fc2')
net = slim.dropout(net, 0.5, scope='dropout2')
net = slim.fully_connected(net, 64, activation_fn=None, scope='fc3')
net = slim.dropout(net, 0.5, scope='dropout3')
net = slim.fully_connected(net,num_classes)
outputs = slim.softmax(net, scope='predictions')
return outputs
#set the backward nets:
#反向传播参数
def backward():
x_datas = tf.placeholder(tf.float32,[None,224,224,3],name='Input')
y_labels = tf.placeholder(tf.float32,[None,2],name='labels')
is_training = tf.placeholder(dtype=tf.bool,name='is_training')
predictions = forward(x_datas,is_training)
#损失函数
#ce = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=predictions,labels=tf.argmax(y_labels,1))
#cem = tf.reduce_mean(ce)
#loss = cem+tf.add_n(tf.get_collection('losses'))
loss = slim.losses.softmax_cross_entropy(predictions, y_labels)
with tf.name_scope('cross_entropy'):
loss = slim.losses.get_total_loss(add_regularization_losses=True)
tf.summary.scalar('cross_entropy',loss)
#准确率
correct_prediction = tf.equal(tf.argmax(predictions,1),tf.argmax(y_labels,1))
with tf.name_scope('accuracy'):
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
tf.summary.scalar('accuracy',accuracy)
#指数衰减学习率
global_step = tf.Variable(0,trainable=False)
learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,
global_step,
decay_step, #dataset_num/batch_size,
LRARNING_RATE_DECAY,
staircase=True)
#优化算法:梯度下降方法可以找到最优解
#梯度下降优化器
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
#train_step = optimizer.minimize(loss,global_step=global_step)
#Adams优化器
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=0.01)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops): #保证train_op在update_ops执行之后再执行。
train_step = optimizer.minimize(loss)
print('---------------------------------Load the image data------------------------------------')
#--------------------------------------------------------------tfrecords-------------------------------------------------
#读取数据
#capacity>=min_after_dequeue+num_threads*batch_szie
dsets, dlabel = read_and_decode(tfrecords_path)
img_batch, label_batch = tf.train.batch([dsets,dlabel],
batch_size=batch_size,
num_threads=4,
capacity= 640)
label_batch= tf.one_hot(label_batch,num_classes)
sets_test, label_test = read_and_decode(test_path)
dsets_test, dlabel_test = tf.train.batch([sets_test, label_test],
batch_size=16,
num_threads=3,
capacity = 64)
dlabel_test= tf.one_hot(dlabel_test,num_classes)
print('---------------------------------Load the image data successful------------------------------------')
#写入日志log
merged = tf.summary.merge_all()
#训练数据
print("***************************************start the gpu with kernel function*********************************************\n")
with tf.Session() as sess:
print("###########the training is start#########")
#初始化日志
summary_writer = tf.summary.FileWriter(log_path,sess.graph)
init = tf.global_variables_initializer()
sess.run(init)
coord = tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)
#处理BN层,获取滑动平均值
var_list = tf.trainable_variables()
g_list = tf.global_variables()
bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
var_list += bn_moving_vars
saver = tf.train.Saver(var_list=var_list,max_to_keep=2,keep_checkpoint_every_n_hours=1)
for i in range(num_steps):
#计算时间
start_time = time.time()
img_trains,label_trains = sess.run([img_batch, label_batch])
_,acc,los,summary= sess.run([train_step,accuracy,loss,merged],feed_dict={x_datas:img_trains,y_labels:label_trains,is_training:True})
#添加日志
summary_writer.add_summary(summary,i)
#测试已经训练的模型
img_test,label_test = sess.run([dsets_test, dlabel_test])
y_test,acc_test = sess.run([predictions,accuracy],feed_dict={x_datas:img_test,y_labels:label_test,is_training:False})
#print(y_test)
duration = time.time()-start_time
#打印输出信息
if i % 5 == 0:
print("the accuracy is : %.2f%%,the loss is : [%.8f],the total step is :[%i] " %(acc*100,los,i),end='')
print("test accuracy is : %.2f%%,%.3fs/step"%(acc_test*100,duration))
if i % 1000 == 0:
#中间保存模型
saver_path = saver.save(sess,model_path,global_step = i,write_meta_graph=True)
#保存最后模型 ckpt,meta,data
saver_path = saver.save(sess,model_path,global_step = num_steps,write_meta_graph=True)
#关闭线程协调器
coord.request_stop()
coord.join(threads)
#保存模型pb
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['predictions/Softmax'])
with tf.gfile.FastGFile(pb_path, mode='wb') as f:
f.write(constant_graph.SerializeToString())
with tf.gfile.FastGFile(pb_path, mode='rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
#保存模型pbtxt
tf.train.write_graph(graph_def, pbtxt_path, 'classify.pbtxt', as_text=True)
'''
var_list = tf.global_variables()
constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def,
output_node_names=[var_list[i].name for i in range(len(var_list))])
tf.train.write_graph(constant_graph, pbtxt_path, pb_name, as_text=False)
tf.train.write_graph(constant_graph, pbtxt_path, pbtext_name, as_text=True)
'''
summary_writer.close()
def main():
backward()
main()
文件名:test_OpenCV.py
运行方式:两种都可
python test_OpenCV.py --model model/model.ckpt-20000.meta --image [要预测的图像(例如 ../../SecondProject/data/cat.9001.jpg)]
python test_OpenCV.py --image [要预测的图像(例如 ../../SecondProject/data/cat.9001.jpg)]
"""
designer by zouyuelin
Tianjin University(TJU)
"""
import cv2
import tensorflow as tf
import numpy as np
from tensorflow.python.platform import gfile
import os
model_path = './model/'
model_name = 'classify.pb'
tf.app.flags.DEFINE_string(
'image', 'None', 'type of the training nets.')
tf.app.flags.DEFINE_string(
'model', 'None', 'need meta file.')
FLAGS = tf.app.flags.FLAGS
sess = tf.Session()
#利用模型meta data checkpoint
if(FLAGS.model != 'None'):
saver = tf.train.import_meta_graph(FLAGS.model)#.meta
saver.restore(sess,tf.train.latest_checkpoint(model_path))#checkpoint
graph = tf.get_default_graph()
x = sess.graph.get_tensor_by_name("Input:0")
op_to_restore = sess.graph.get_tensor_by_name("predictions/Softmax:0")
is_training = sess.graph.get_tensor_by_name("is_training:0")
#利用模型pb文件
else:
with gfile.FastGFile(model_path+model_name,'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
sess.graph.as_default()
tf.import_graph_def(graph_def,name='')
x = sess.graph.get_tensor_by_name("Input:0")
op_to_restore = sess.graph.get_tensor_by_name("predictions/Softmax:0")
is_training = sess.graph.get_tensor_by_name("is_training:0")
if(FLAGS.image != 'None'):
img = cv2.imread(FLAGS.image)
dst = cv2.resize(img,(224,224),interpolation=cv2.INTER_CUBIC)#
rgb = cv2.cvtColor(dst,cv2.COLOR_BGR2RGB)
else:
os._exit(0)
gb_ = np.array(rgb)*(1./255)
gb = gb_.reshape(1,224,224,3)
print(gb)
feed_dict ={x:gb,is_training:False}
result_index = sess.run(op_to_restore,feed_dict)
cv2.imshow('animals',dst)
cv2.waitKey(1000)
print("-----------------------------predict:---------------------------\n")
print(result_index)
if(np.argmax(result_index)==0):
print("It is a cat")
else:
print("It is a dog")
'''#图像测试模型
image_total = os.listdir(images_path)
for index_animals in image_total:
src = cv2.imread(images_path+index_animals)
dst = cv2.resize(src,(224,224),interpolation=cv2.INTER_CUBIC)#
cv2.imshow('animals',dst)
cv2.waitKey(300)
print('this is %s'%(index_animals),end=' ')
rgb = cv2.cvtColor(dst,cv2.COLOR_BGR2RGB)
gb = rgb.reshape(1,224,224,3)
feed_dict ={x_datas:rgb}
test_output = sess.run([predictions],feed_dict=feed_dict)
index_max = np.argmax(test_output)
if(index_max == 0):
print('prediction:cat\n')
else:
print('prediction:dog\n')
cv2.dnn.readNetFromTensorflow('model/classify.pb')'''
运行的顺序:
1.创建tfrecords
python creat_tfrecords.py --data [图像数据集路径]
2.训练(选择合适网络(默认为resnet101))
可选参数:vggnet16,resnet50,resnet101,resnet152,simple
python train.py --typenets [网络类型(默认resnet101)]
其实resnet101已经足够了!!
3.测试
python test_OpenCV.py --image [要预测的图像(例如 ../../SecondProject/data/cat.9001.jpg)
4.查看过程曲线
tensorboard --logdir=./log/log
嘻嘻
作为一个入坑机器学习的学渣,有很多不足的地方,感谢各位大佬的批评!!
最后附上本人实验室电脑配置:
显卡:NVIDIA TITAN V,11GB显存
内存RAM: 16GB
处理器:Inter i9-9900 3.10GHz x 16
磁盘:固态硬盘 1.3TB
操作系统:Ubuntu 20.04.1 LTS
类型:64位
内核版本:5.4.0
python 版本:3.6.0 && 2.7.18
Tensorflow 版本:GPU版 1.13.1
cuda : 10.0
cudnn: 7.6.4
整个项目已经上传到 我的github上(可直接运行):
因篇幅问题不能全部显示,请点此查看更多更全内容