resnet_v2_101训练自己的数据,完成分类任务之数据制作

最近做了一个简单的分类任务，作一下记录，适合小白。从数据处理到推理代码都有，没错，我就是这么暖心！本次代码的框架使用了tf1.14版本，喜欢追逐梦想的童鞋可以尝试使用2.0的。主要差别是很多函数的API入口变了，一些旧的模块在2.0里不支持。关于resnet，这里主要使用slim包来实现，虽然我很喜欢造轮子，但是排期不允许。考虑到不是核心模块，直接拿来主义。到这里，可能有人会迷惑，slim包里有v1和v2版本，到底用哪个呢？我印象中，v1是原著的模型,v2在shortcut-connections上进行了改动，V2的残差单元的激活函数变了。从大牛们的测试结果上看，V2比V1准确度更高些。

入手步骤：当然是先准备数据啦，我这里转成了tfrecord格式。图片准备不用我说了吧~直接上数据处理代码。

import tensorflow as tf
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import random
from PIL import Image

def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))

def get_example_nums(tf_records_filenames):
nums= 0
for record in tf.python_io.tf_record_iterator(tf_records_filenames):
nums += 1
return nums

def show_image(title,image):
plt.imshow(image)
plt.axis('on') # 关掉坐标轴为 off
plt.title(title) # 图像题目
plt.show()

def load_labels_file(filename,labels_num=1,shuffle=False):
images=[]
labels=[]
with open(filename) as f:
lines_list=f.readlines()
if shuffle:
random.shuffle(lines_list)

for lines in lines_list:
line=lines.rstrip().split(' ')
label=[]
for i in range(labels_num):
label.append(int(line[i+1]))
images.append(line[0])
labels.append(label)
return images,labels

def read_image(filename, resize_height, resize_width,normalization=False):
bgr_image = cv2.imread(filename)
if len(bgr_image.shape)==2:#若是灰度图则转为三通道
print("Warning:gray image",filename)
bgr_image = cv2.cvtColor(bgr_image, cv2.COLOR_GRAY2BGR)

rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)#将BGR转为RGB
if resize_height>0 and resize_width>0:
rgb_image=cv2.resize(rgb_image,(resize_width,resize_height))
rgb_image=np.asanyarray(rgb_image)
if normalization:
rgb_image=rgb_image/255.0
return rgb_image

def get_batch_images(images,labels,batch_size,labels_nums,one_hot=False,shuffle=False,num_threads=1):
min_after_dequeue = 200
capacity = min_after_dequeue + 3 * batch_size # 保证capacity必须大于min_after_dequeue参数值
if shuffle:
images_batch, labels_batch = tf.train.shuffle_batch([images,labels],batch_size=batch_size,capacity=capacity,min_after_dequeue=min_after_dequeue,num_threads=num_threads)
else:
images_batch, labels_batch = tf.train.batch([images,labels],batch_size=batch_size,capacity=capacity,num_threads=num_threads)
if one_hot:
labels_batch = tf.one_hot(labels_batch, labels_nums, 1, 0)
return images_batch,labels_batch

def read_records(filename,resize_height, resize_width,type=None):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
'label': tf.FixedLenFeature([], tf.int64)
}
)
tf_image = tf.decode_raw(features['image_raw'], tf.uint8)#获得图像原始的数据
tf_height = features['height']
tf_width = features['width']
tf_depth = features['depth']
tf_label = tf.cast(features['label'], tf.int32)
# PS:恢复原始图像数据,reshape的大小必须与保存之前的图像shape一致,否则出错
# tf_image=tf.reshape(tf_image, [-1]) # 转换为行向量
tf_image=tf.reshape(tf_image, [resize_height, resize_width, 3]) # 设置图像的维度

# 恢复数据后,才可以对图像进行resize_images:输入uint->输出float32
# tf_image=tf.image.resize_images(tf_image,[224, 224])

# 存储的图像类型为uint8,tensorflow训练时数据必须是tf.float32
if type is None:
tf_image = tf.cast(tf_image, tf.float32)
elif type=='normalization':# [1]若需要归一化请使用:
# 仅当输入数据是uint8,才会归一化[0,255]
# tf_image = tf.image.convert_image_dtype(tf_image, tf.float32)
tf_image = tf.cast(tf_image, tf.float32) * (1. / 255.0) # 归一化
elif type=='centralization':
# 若需要归一化,且中心化,假设均值为0.5,请使用:
tf_image = tf.cast(tf_image, tf.float32) * (1. / 255) - 0.5 #中心化
return tf_image,tf_label

def create_records(image_dir,file, output_record_dir, resize_height, resize_width,shuffle,log=5):
images_list, labels_list=load_labels_file(file,1,shuffle)
writer = tf.python_io.TFRecordWriter(output_record_dir)
for i, [image_name, labels] in enumerate(zip(images_list, labels_list)):
image_path=os.path.join(image_dir,images_list[i])
if not os.path.exists(image_path):
print('Err:no image',image_path)
continue
image = read_image(image_path, resize_height, resize_width)
image_raw = image.tostring()
if i%log==0 or i==len(images_list)-1:
print('------------processing:%d-th------------' % (i))
print('current image_path=%s' % (image_path),'shape:{}'.format(image.shape),'labels:{}'.format(labels))
# 这里仅保存一个label,多label适当增加"'label': _int64_feature(label)"项
label=labels[0]
example = tf.train.Example(features=tf.train.Features(feature={
'image_raw': _bytes_feature(image_raw),
'height': _int64_feature(image.shape[0]),
'width': _int64_feature(image.shape[1]),
'depth': _int64_feature(image.shape[2]),
'label': _int64_feature(label)
}))
writer.write(example.SerializeToString())
writer.close()

def disp_records(record_file,resize_height, resize_width,show_nums=4):
tf_image, tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
# 显示前4个图片
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(show_nums):
image,label = sess.run([tf_image,tf_label]) # 在会话中取出image和label
# image = tf_image.eval()
# 直接从record解析的image是一个向量,需要reshape显示
# image = image.reshape([height,width,depth])
print('shape:{},tpye:{},labels:{}'.format(image.shape,image.dtype,label))
# pilimg = Image.fromarray(np.asarray(image_eval_reshape))
# pilimg.show()
show_image("image:%d"%(label),image)
coord.request_stop()
coord.join(threads)

def batch_test(record_file,resize_height, resize_width):
'''
:param record_file: record文件路径
:param resize_height:
:param resize_width:
:return:
:PS:image_batch, label_batch一般作为网络的输入
'''
# 读取record函数
tf_image,tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
image_batch, label_batch= get_batch_images(tf_image,tf_label,batch_size=4,labels_nums=5,one_hot=False,shuffle=False)

init = tf.global_variables_initializer()
with tf.Session() as sess: # 开始一个会话
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(4):
# 在会话中取出images和labels
images, labels = sess.run([image_batch, label_batch])
# 这里仅显示每个batch里第一张图片
show_image("image", images[0, :, :, :])
print('shape:{},tpye:{},labels:{}'.format(images.shape,images.dtype,labels))

# 停止所有线程
coord.request_stop()
coord.join(threads)

if __name__ == '__main__':
resize_height = 224 # 指定存储图片高度
resize_width = 224 # 指定存储图片宽度
shuffle=True
log=5
# 产生train.record文件
image_dir='train'
train_labels = 'train.txt' # 训练图片路径和标签保存
train_record_output = 'record/train{}.tfrecords'.format(resize_height)
create_records(image_dir,train_labels, train_record_output, resize_height, resize_width,shuffle,log)
train_nums=get_example_nums(train_record_output)
print("save train example nums={}".format(train_nums))

# 产生val.record文件
image_dir='validation'
val_labels = 'val.txt' #验证图片路径和标签保存
val_record_output = 'record/val{}.tfrecords'.format(resize_height)
create_records(image_dir,val_labels, val_record_output, resize_height, resize_width,shuffle,log)
val_nums=get_example_nums(val_record_output)
print("save val example nums={}".format(val_nums))

# 测试显示函数
# disp_records(train_record_output,resize_height, resize_width)
batch_test(train_record_output,resize_height, resize_width)