引言:
过去几周我一直在涉足深度学习领域,尤其是卷积神经网络模型。最近,谷歌围绕街景多位数字识别技术发布了一篇不错的paper。该文章描述了一个用于提取街景门牌号的单个端到端神经网络系统。然后,作者阐述了基于同样的网络结构如何来突破谷歌验证码识别系统的准确率。
为了亲身体验神经网络的实现,我决定尝试设计一个可以解决类似问题的系统:国内车牌号自动识别系统。设计这样一个系统的原因有3点:
- 我应该能够参照谷歌那篇paper搭建一个同样的或者类似的网络架构:谷歌提供的那个网络架构在验证码识别上相当不错,那么讲道理的话,用它来识别车牌号应该也会很给力。拥有一个知名的网络架构将会大大地简化我学习CNN的步骤。
- 我可以很容易地生成训练数据(车牌数)。训练神经网络存在一个很大的问题就是需要大量的标签样本。通常要训练好一个网络就需要几十万张标记过的图片。
- 好奇心。传统的车牌号自动识别系统依赖于自己编写算法来实现车牌定位,标准化,分割和字符识别等功能。照这样的话,实现这些系统的代码可能达到上千行。然而,我比较感兴趣的是,如何使用相对较少的代码和最少的专业领域知识来开发一个不错的系统。
开发该项目的环境要求有Python,Tensorflow,OpenCV和NumPy等软件。源代码在这里。
首先项目结构及成果图:
合成图片
genplate.py
为了训练任何一个神经网络,必须提供一套拥有正确输出的训练数据。
文本和车牌背景国内正常的车牌格式,但是文本颜色必须比车牌颜色更深一些。这是为了模拟真实场景的光线变化。最后再加入一些噪音,这样不仅能够解释真实传感器的噪音,而且能够避免过多依赖于锐化的轮廓边界而看到的将会是离焦的输入图片。font目录导入了字体包、image导入了车牌背景及噪声图片。
车牌变换采用了一种基于随机滚转、倾斜、偏转、平移以及缩放的仿射变换。每个参数允许的范围是车牌号可能被看到的所有情况的集合。比如,偏转比滚转允许变化更多(你更可能看到一辆汽车在拐弯而不是翻转到一边)。
-
- import PIL
- from PIL import ImageFont
- from PIL import Image
- from PIL import ImageDraw
- import cv2;
- import numpy as np;
- import os;
- from math import *
-
- # 生成车牌
- # font = ImageFont.truetype("Arial-Bold.ttf",14)
-
- index = {"京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10, "浙": 11, "皖": 12,
- "闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20, "琼": 21, "川": 22, "贵": 23, "云": 24,
- "藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,
- "6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,
- "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
- "W": 61, "X": 62, "Y": 63, "Z": 64};
-
- chars = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
- "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
- "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
- "Y", "Z"
- ];
-
- def AddSmudginess(img, Smu):
- rows = r(Smu.shape[0] - 50)
-
- cols = r(Smu.shape[1] - 50)
- adder = Smu[rows:rows + 50, cols:cols + 50];
- adder = cv2.resize(adder, (50, 50));
- # adder = cv2.bitwise_not(adder)
- img = cv2.resize(img,(50,50))
- img = cv2.bitwise_not(img)
- img = cv2.bitwise_and(adder, img)
- img = cv2.bitwise_not(img)
- return img
- def rot(img,angel,shape,max_angel):
- """ 使图像轻微的畸变
- img 输入图像
- factor 畸变的参数
- size 为图片的目标尺寸
- """
- size_o = [shape[1],shape[0]]
- size = (shape[1]+ int(shape[0]*cos((float(max_angel )/180) * 3.14)),shape[0])
- interval = abs( int( sin((float(angel) /180) * 3.14)* shape[0]));
- pts1 = np.float32([[0,0],[0,size_o[1]],[size_o[0],0],[size_o[0],size_o[1]]])
- if(angel>0):
- pts2 = np.float32([[interval,0],[0,size[1] ],[size[0],0 ],[size[0]-interval,size_o[1]]])
- else:
- pts2 = np.float32([[0,0],[interval,size[1] ],[size[0]-interval,0 ],[size[0],size_o[1]]])
- M = cv2.getPerspectiveTransform(pts1,pts2);
- dst = cv2.warpPerspective(img,M,size);
- return dst;
- def rotRandrom(img, factor, size):
- shape = size;
- pts1 = np.float32([[0, 0], [0, shape[0]], [shape[1], 0], [shape[1], shape[0]]])
- pts2 = np.float32([[r(factor), r(factor)], [ r(factor), shape[0] - r(factor)], [shape[1] - r(factor), r(factor)],
- [shape[1] - r(factor), shape[0] - r(factor)]])
- M = cv2.getPerspectiveTransform(pts1, pts2);
- dst = cv2.warpPerspective(img, M, size);
- return dst;
- def tfactor(img):
- hsv = cv2.cvtColor(img,cv2.COLOR_BGR2HSV);
- hsv[:,:,0] = hsv[:,:,0]*(0.8+ np.random.random()*0.2);
- hsv[:,:,1] = hsv[:,:,1]*(0.3+ np.random.random()*0.7);
- hsv[:,:,2] = hsv[:,:,2]*(0.2+ np.random.random()*0.8);
- img = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR);
- return img
- def random_envirment(img,data_set):
- index=r(len(data_set))
- env = cv2.imread(data_set[index])
- env = cv2.resize(env,(img.shape[1],img.shape[0]))
- bak = (img==0);
- bak = bak.astype(np.uint8)*255;
- inv = cv2.bitwise_and(bak,env)
- img = cv2.bitwise_or(inv,img)
- return img
- def GenCh(f,val):
- img=Image.new("RGB", (45,70),(255,255,255))
- draw = ImageDraw.Draw(img)
- draw.text((0, 3),val,(0,0,0),font=f)
- img = img.resize((23,70))
- A = np.array(img)
-
- return A
- def GenCh1(f,val):
- img=Image.new("RGB", (23,70),(255,255,255))
- draw = ImageDraw.Draw(img)
- #draw.text((0, 2),val.decode('utf-8'),(0,0,0),font=f)
- draw.text((0, 2),val,(0,0,0),font=f)
- A = np.array(img)
- return A
- def AddGauss(img, level):
- return cv2.blur(img, (level * 2 + 1, level * 2 + 1));
-
- def r(val):
- return int(np.random.random() * val)
-
- def AddNoiseSingleChannel(single):
- diff = 255-single.max();
- noise = np.random.normal(0,1+r(6),single.shape);
- noise = (noise - noise.min())/(noise.max()-noise.min())
- noise= diff*noise;
- noise= noise.astype(np.uint8)
- dst = single + noise
- return dst
-
- def addNoise(img,sdev = 0.5,avg=10):
- img[:,:,0] = AddNoiseSingleChannel(img[:,:,0]);
- img[:,:,1] = AddNoiseSingleChannel(img[:,:,1]);
- img[:,:,2] = AddNoiseSingleChannel(img[:,:,2]);
- return img;
-
- class GenPlate:
- def __init__(self,fontCh,fontEng,NoPlates):
- self.fontC = ImageFont.truetype(fontCh,43,0);
- self.fontE = ImageFont.truetype(fontEng,60,0);
- self.img=np.array(Image.new("RGB", (226,70),(255,255,255)))
- self.bg = cv2.resize(cv2.imread("./images/template.bmp"),(226,70));
- self.smu = cv2.imread("./images/smu2.jpg");
- self.noplates_path = [];
- for parent,parent_folder,filenames in os.walk(NoPlates):
- for filename in filenames:
- path = parent+"/"+filename;
- self.noplates_path.append(path);
- def draw(self,val):
- offset= 2 ;
- self.img[0:70,offset+8:offset+8+23]= GenCh(self.fontC,val[0]);
- self.img[0:70,offset+8+23+6:offset+8+23+6+23]= GenCh1(self.fontE,val[1]);
- for i in range(5):
- base = offset+8+23+6+23+17 +i*23 + i*6 ;
- self.img[0:70, base : base+23]= GenCh1(self.fontE,val[i+2]);
- return self.img
- def generate(self,text):
- if len(text) == 7:
- fg = self.draw(text.encode('utf-8').decode(encoding="utf-8"));
- fg = cv2.bitwise_not(fg);
- com = cv2.bitwise_or(fg,self.bg);
- com = rot(com,r(60)-30,com.shape,30);
- com = rotRandrom(com,10,(com.shape[1],com.shape[0]));
- #com = AddSmudginess(com,self.smu)
- com = tfactor(com)
- com = random_envirment(com,self.noplates_path);
- com = AddGauss(com, 1+r(4));
- com = addNoise(com);
-
-
- return com
- def genPlateString(self,pos,val):
- plateStr = "";
- box = [0,0,0,0,0,0,0];
- if(pos!=-1):
- box[pos]=1;
- for unit,cpos in zip(box,range(len(box))):
- if unit == 1:
- plateStr += val
- else:
- if cpos == 0:
- plateStr += chars[r(31)]
- elif cpos == 1:
- plateStr += chars[41+r(24)]
- else:
- plateStr += chars[31 + r(34)]
-
- return plateStr;
-
- def genBatch(self, batchSize,pos,charRange, outputPath,size):
- if (not os.path.exists(outputPath)):
- os.mkdir(outputPath)
- for i in range(batchSize):
- plateStr = G.genPlateString(-1,-1)
- img = G.generate(plateStr);
- img = cv2.resize(img,size);
- cv2.imwrite(outputPath + "/" + str(i).zfill(2) + ".jpg", img);
-
- # return img
- G = GenPlate("./font/platech.ttf",'./font/platechar.ttf',"./NoPlates")
- G.genBatch(15,2,range(31,65),"./plate",(272,72)) #注释原因为每次其他模块运行,若导入该库,都会刷性该函数
inputdata.py 省略....
产生用于训练的数据
# 批量生成车牌,供算法调用
网络结构 # 算法的核心,卷积神经网络
使用的卷积神经网络结构如model.py所示:
- import tensorflow as tf
- import numpy as np
-
- # 算法的核心,卷积神经网络
- def inference (images,keep_prob):
- '''
- Build the model
- Args:
- image: image batch,4D tensor,tf.float32,[batch_size,height,width,channels]
- Returns:
- output tensor with the computed logits,float,[batch_size,65]
- '''
- # conv1
- with tf.variable_scope('conv1') as scope:
- weights = tf.get_variable('weights',
- shape = [3,3,3,32],
- dtype = tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv = tf.nn.conv2d(images,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',
- shape=[32],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv1 = tf.nn.relu(pre_activation,name= scope.name)
-
- # conv2
- with tf.variable_scope('conv2') as scope:
- weights = tf.get_variable('weights',shape=[3,3,32,32],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv = tf.nn.conv2d(conv1,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',
- shape=[32],
- dtype=tf.float32,
- initializer=tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv2 = tf.nn.relu(pre_activation,name= scope.name)
-
- with tf.variable_scope('max_pooling1') as scope:
- pool1 = tf.nn.max_pool(conv2,ksize = [1,2,2,1],strides= [1,2,2,1],padding='VALID',name='pooling1')
-
- #conv3
- with tf.variable_scope('conv3') as scope:
- weights = tf.get_variable('weights',shape=[3,3,32,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv = tf.nn.conv2d(pool1,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',shape=[64],dtype = tf.float32,initializer= tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv3 = tf.nn.relu(pre_activation,name=scope.name)
-
- #conv4
- with tf.variable_scope('conv4') as scope:
- weights = tf.get_variable('weights',shape=[3,3,64,64],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv =tf.nn.conv2d(conv3,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',shape=[64],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv4 = tf.nn.relu(pre_activation,name=scope.name)
-
- with tf.variable_scope('max_pooling2') as scope:
- pool2 = tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pooling2')
-
- #conv5
- with tf.variable_scope('conv5') as scope:
- weights = tf.get_variable('weights',shape=[3,3,64,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv =tf.nn.conv2d(pool2,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv5 = tf.nn.relu(pre_activation,name=scope.name)
-
- #conv6
- with tf.variable_scope('conv6') as scope:
- weights = tf.get_variable('weights',shape=[3,3,128,128],dtype=tf.float32,initializer=tf.truncated_normal_initializer(stddev=0.1,dtype=tf.float32))
- conv =tf.nn.conv2d(conv5,weights,strides=[1,1,1,1],padding='VALID')
- biases = tf.get_variable('biases',shape=[128],dtype=tf.float32,initializer=tf.constant_initializer(0.1))
- pre_activation = tf.nn.bias_add(conv,biases)
- conv6 = tf.nn.relu(pre_activation,name=scope.name)
-
- #pool3
- with tf.variable_scope('max_pool3') as scope:
- pool3 = tf.nn.max_pool(conv6,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pool3')
- #%%
- #fc1_flatten
- with tf.variable_scope('fc1') as scope:
- shp = pool3.get_shape()
- flattened_shape =shp[1].value*shp[2].value*shp[3].value
- reshape = tf.reshape(pool3,[-1,flattened_shape])
- fc1 = tf.nn.dropout(reshape,keep_prob,name='fc1_dropdot')
-
- # 全连接层
- # 第一个全连接层,识别车牌上的省
- with tf.variable_scope('fc21') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc21 = tf.matmul(fc1,weights)+biases
- # 第二个全连接层,识别车牌上的市
- with tf.variable_scope('fc22') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc22 = tf.matmul(fc1,weights)+biases
- # 第三个全连接层,识别车牌第三位的字母或者数字
- with tf.variable_scope('fc23') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc23= tf.matmul(fc1,weights)+biases
- with tf.variable_scope('fc24') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc24 = tf.matmul(fc1,weights)+biases
- with tf.variable_scope('fc25') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc25 = tf.matmul(fc1,weights)+biases
- with tf.variable_scope('fc26') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc26 = tf.matmul(fc1,weights)+biases
- with tf.variable_scope('fc27') as scope:
- weights = tf.get_variable('weights',
- shape=[flattened_shape,65],
- dtype=tf.float32,
- initializer=tf.truncated_normal_initializer(stddev=0.005,dtype=tf.float32))
-
- biases = tf.get_variable('biases',
- shape=[65],
- dtype=tf.float32,
- initializer = tf.truncated_normal_initializer(0.1)
- )
- fc27 = tf.matmul(fc1,weights)+biases
-
- return fc21,fc22,fc23,fc24,fc25,fc26,fc27 #shape = [7,batch_size,65]
-
- # 卷积神经网络返回的输出层,进行交叉熵计算
- def losses(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
- '''Compute loss from logits and labels
- Args:
- logits: logits tensor, float, [7*batch_size, 65]
- labels: label tensor, tf.int32, [7*batch_size]
- Returns:
- loss tensor of float type
- '''
- labels = tf.convert_to_tensor(labels,tf.int32)
-
- with tf.variable_scope('loss1') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits1, labels=labels[:,0], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss1 = tf.reduce_mean(cross_entropy, name='loss1')
- tf.summary.scalar(scope.name+'/loss1', loss1)
-
- with tf.variable_scope('loss2') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits2, labels=labels[:,1], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss2 = tf.reduce_mean(cross_entropy, name='loss2')
- tf.summary.scalar(scope.name+'/loss2', loss2)
-
- with tf.variable_scope('loss3') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits3, labels=labels[:,2], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss3 = tf.reduce_mean(cross_entropy, name='loss3')
- tf.summary.scalar(scope.name+'/loss3', loss3)
-
- with tf.variable_scope('loss4') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits4, labels=labels[:,3], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss4 = tf.reduce_mean(cross_entropy, name='loss4')
- tf.summary.scalar(scope.name+'/loss4', loss4)
-
- with tf.variable_scope('loss5') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits5, labels=labels[:,4], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss5 = tf.reduce_mean(cross_entropy, name='loss5')
- tf.summary.scalar(scope.name+'/loss5', loss5)
-
- with tf.variable_scope('loss6') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits6, labels=labels[:,5], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss6 = tf.reduce_mean(cross_entropy, name='loss6')
- tf.summary.scalar(scope.name+'/loss6', loss6)
-
- with tf.variable_scope('loss7') as scope:
- cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits7, labels=labels[:,6], name='xentropy_per_example')
- #cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels,name='xentropy_per_example')
- loss7 = tf.reduce_mean(cross_entropy, name='loss7')
- tf.summary.scalar(scope.name+'/loss7', loss7)
-
- return loss1,loss2,loss3,loss4,loss5,loss6,loss7
-
- # 最优化,自适应梯度下降,进行优化
- def trainning( loss1,loss2,loss3,loss4,loss5,loss6,loss7, learning_rate):
- '''Training ops, the Op returned by this function is what must be passed to
- 'sess.run()' call to cause the model to train.
- Args:
- loss: loss tensor, from losses()
- Returns:
- train_op: The op for trainning
- '''
- with tf.name_scope('optimizer1'):
- optimizer1 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op1 = optimizer1.minimize(loss1, global_step= global_step)
- with tf.name_scope('optimizer2'):
- optimizer2 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op2 = optimizer2.minimize(loss2, global_step= global_step)
- with tf.name_scope('optimizer3'):
- optimizer3 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op3 = optimizer3.minimize(loss3, global_step= global_step)
- with tf.name_scope('optimizer4'):
- optimizer4 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op4 = optimizer4.minimize(loss4, global_step= global_step)
- with tf.name_scope('optimizer5'):
- optimizer5 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op5 = optimizer5.minimize(loss5, global_step= global_step)
- with tf.name_scope('optimizer6'):
- optimizer6 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op6 = optimizer6.minimize(loss6, global_step= global_step)
- with tf.name_scope('optimizer7'):
- optimizer7 = tf.train.AdamOptimizer(learning_rate= learning_rate)
- global_step = tf.Variable(0, name='global_step', trainable=False)
- train_op7 = optimizer7.minimize(loss7, global_step= global_step)
-
- return train_op1,train_op2,train_op3,train_op4,train_op5,train_op6,train_op7
-
- # 对模型评估
- def evaluation(logits1,logits2,logits3,logits4,logits5,logits6,logits7,labels):
- """Evaluate the quality of the logits at predicting the label.
- Args:
- logits: Logits tensor, float - [batch_size, NUM_CLASSES].
- labels: Labels tensor, int32 - [batch_size], with values in the
- range [0, NUM_CLASSES).
- Returns:
- A scalar int32 tensor with the number of examples (out of batch_size)
- that were predicted correctly.
- """
- logits_all = tf.concat([logits1,logits2,logits3,logits4,logits5,logits6,logits7],0)
- labels = tf.convert_to_tensor(labels,tf.int32)
- labels_all = tf.reshape(tf.transpose(labels),[-1])
- with tf.variable_scope('accuracy') as scope:
- correct = tf.nn.in_top_k(logits_all, labels_all, 1)
- correct = tf.cast(correct, tf.float16)
- accuracy = tf.reduce_mean(correct)
- tf.summary.scalar(scope.name+'/accuracy', accuracy)
- return accuracy
训练模型:train.py
- import os
- import numpy as np
- import tensorflow as tf
- from input_data import OCRIter
- import model
- import time
- import datetime
-
- # train训练
- img_w = 272
- img_h = 72
- num_label=7
- batch_size = 8
- # count =30000
- count = 500000
- learning_rate = 0.0001
-
- #默认参数[N,H,W,C]
- image_holder = tf.placeholder(tf.float32,[batch_size,img_h,img_w,3])
- label_holder = tf.placeholder(tf.int32,[batch_size,7])
- keep_prob = tf.placeholder(tf.float32)
-
- logs_train_dir = r'H:\GPpython\第三阶段数据分析\神经网络\Licence_plate_recognize\Licence_plate_recognize\train_result'
-
- def get_batch():
- data_batch = OCRIter(batch_size,img_h,img_w)
- image_batch,label_batch = data_batch.iter()
-
- image_batch1 = np.array(image_batch)
- label_batch1 = np.array(label_batch)
- return image_batch1,label_batch1
-
- def fit():
- train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7 = model.inference(
- image_holder, keep_prob)
-
- train_loss1, train_loss2, train_loss3, train_loss4, train_loss5, train_loss6, train_loss7 = model.losses(
- train_logits1, train_logits2, train_logits3, train_logits4, train_logits5, train_logits6, train_logits7,
- label_holder)
- train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7 = model.trainning(train_loss1,
- train_loss2,
- train_loss3,
- train_loss4,
- train_loss5,
- train_loss6,
- train_loss7,
- learning_rate)
-
- train_acc = model.evaluation(train_logits1, train_logits2, train_logits3, train_logits4, train_logits5,
- train_logits6, train_logits7, label_holder)
-
- input_image = tf.summary.image('input', image_holder)
-
- summary_op = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))
-
- sess = tf.Session()
- train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
- saver = tf.train.Saver()
- sess.run(tf.global_variables_initializer())
- start_time1 = time.time()
- for step in range(count):
- x_batch, y_batch = get_batch()
- start_time2 = time.time()
- time_str = datetime.datetime.now().isoformat()
- feed_dict = {image_holder: x_batch, label_holder: y_batch, keep_prob: 0.5}
- _, _, _, _, _, _, _, tra_loss1, tra_loss2, tra_loss3, tra_loss4, tra_loss5, tra_loss6, tra_loss7, acc, summary_str = sess.run(
- [train_op1, train_op2, train_op3, train_op4, train_op5, train_op6, train_op7, train_loss1, train_loss2,
- train_loss3, train_loss4, train_loss5, train_loss6, train_loss7, train_acc, summary_op], feed_dict)
- train_writer.add_summary(summary_str, step)
- duration = time.time() - start_time2
- tra_all_loss = tra_loss1 + tra_loss2 + tra_loss3 + tra_loss4 + tra_loss5 + tra_loss6 + tra_loss7
-
- # print(y_batch) #仅测试代码训练实际样本与标签是否一致
-
- if step % 10 == 0:
- sec_per_batch = float(duration)
- print('%s : Step %d,train_loss = %.2f,acc= %.2f,sec/batch=%.3f' % (
- time_str, step, tra_all_loss, acc, sec_per_batch))
-
- if step % 10000 == 0 or (step + 1) == count:
- checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
- saver.save(sess, checkpoint_path, global_step=step)
- sess.close()
- print(time.time() - start_time1)
- if __name__ == '__main__':
- # fit()
- data,labels = get_batch()
- print(data.shape)
- print(labels.shape)
- print(labels[0])
除了输出层使用ReLU激活函数之外,所有层都采用深度神经网络的标准结构。指示存在的节点使用sigmoid激活函数,典型地用于二值输出。其他输出节点使用softmax贯穿字符(结果是每一列的概率之和为1),是模型化离散概率分布的标准方法。
根据标签和网络输出的交叉熵来定义损失函数。为了数值稳定性,利用softmax_cross_entropy_with_logits和sigmoid_cross_entropy_with_logits将最后一层的激活函数卷入交叉熵的计算。关于对交叉熵详细而直观的介绍可以参考Michael A. Nielsen的free online book中查看这一节。
使用一块nVidia GTX 960m花费大约数小时来训练(train.py),通过CPU的一个后台进程来运行训练数据的生成。
输出处理
- import tensorflow as tf
- import numpy as np
- import os
- from PIL import Image
- import cv2
- import matplotlib.pyplot as plt
- import model
- import genplate
- index = {"京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10, "浙": 11, "皖": 12,
- "闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20, "琼": 21, "川": 22, "贵": 23, "云": 24,
- "藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36,
- "6": 37, "7": 38, "8": 39, "9": 40, "A": 41, "B": 42, "C": 43, "D": 44, "E": 45, "F": 46, "G": 47, "H": 48,
- "J": 49, "K": 50, "L": 51, "M": 52, "N": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60,
- "W": 61, "X": 62, "Y": 63, "Z": 64};
-
- chars = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂",
- "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A",
- "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V", "W", "X",
- "Y", "Z"
- ];
- '''
- Test one image against the saved models and parameters
- '''
- global pic
- def get_one_image(test):
- '''
- Randomly pick one image from training data
- Return: ndarry
- '''
- G = genplate.GenPlate("./font/platech.ttf", './font/platechar.ttf', "./NoPlates")
-
- G.genBatch(15, 2, range(31, 65), "./plate", (272, 72)) # 注释原因为每次其他模块运行,若导入该库,都会刷性该函数
- n = len(test)
- ind =np.random.randint(0,n)
- img_dir = test[ind]
-
- image_show = Image.open(img_dir)
- plt.imshow(image_show)
- #image = image.resize([120,30])
- image = cv2.imread(img_dir)
- global pic
- pic = image
- # cv2.imshow('image', image)
- # cv2.waitKey(0)
- img = np.multiply(image,1/255.0)
- #image = np.array(img)
- #image = img.transpose(1,0,2)
- image = np.array([img])
- print(image.shape)
-
- return image
-
- batch_size = 1
- x = tf.placeholder(tf.float32,[batch_size,72,272,3])
- keep_prob =tf.placeholder(tf.float32)
-
- test_dir = r'H:/GPpython/Licence_plate_recognize/plate/'
- test_image = []
- for file in os.listdir(test_dir):
- test_image.append(test_dir + file)
- test_image = list(test_image)
-
- image_array = get_one_image(test_image)
-
- #logit = model.inference(x,keep_prob)
- logit1,logit2,logit3,logit4,logit5,logit6,logit7 = model.inference(x,keep_prob)
-
- #logit1 = tf.nn.softmax(logit1)
- #logit2 = tf.nn.softmax(logit2)
- #logit3 = tf.nn.softmax(logit3)
- #logit4 = tf.nn.softmax(logit4)
- #logit5 = tf.nn.softmax(logit5)
- #logit6 = tf.nn.softmax(logit6)
- #logit7 = tf.nn.softmax(logit7)
-
-
- logs_train_dir = r'H:/GPpython/Licence_plate_recognize/train_result/'
- saver = tf.train.Saver()
-
- with tf.Session() as sess:
- print ("Reading checkpoint...")
- ckpt = tf.train.get_checkpoint_state(logs_train_dir)
- if ckpt and ckpt.model_checkpoint_path:
- global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
- saver.restore(sess, ckpt.model_checkpoint_path)
- print('Loading success, global_step is %s' % global_step)
- else:
- print('No checkpoint file found')
-
- pre1,pre2,pre3,pre4,pre5,pre6,pre7 = sess.run([logit1,logit2,logit3,logit4,logit5,logit6,logit7], feed_dict={x: image_array,keep_prob:1.0})
- prediction = np.reshape(np.array([pre1,pre2,pre3,pre4,pre5,pre6,pre7]),[-1,65])
- #prediction = np.array([[pre1],[pre2],[pre3],[pre4],[pre5],[pre6],[pre7]])
- #print(prediction)
-
- max_index = np.argmax(prediction,axis=1)
- print(max_index)
- line = ''
- for i in range(prediction.shape[0]):
- if i == 0:
- result = np.argmax(prediction[i][0:31])
- if i == 1:
- result = np.argmax(prediction[i][41:65])+41
- if i > 1:
- result = np.argmax(prediction[i][31:65])+31
-
- line += chars[result]+" "
- print ('predicted: ' + line)
-
- cv2.imshow('pic',pic)
- cv2.waitKeyEx(0)
总结
我已经开源了一个拥有相对较短代码系统,它不用导入任何特定领域的库以及不需要太多特定领域的知识,就能够实现车牌号自动识别。此外,我还通过在线合成图片的方法解决了上千张训练图片的需求问题(通常是在深度神经网络的情况下)。
另一方面,我的系统也存在一些缺点:
- 只适用于特定车牌号。尤其是,网络结构明确假定了输出只有7个字符。
- 只适用于特定字体。
- 速度太慢。该系统运行一张适当尺寸的图片要花费几秒钟。
为了解决第1个问题,谷歌团队将他们的网络结构的高层拆分成了多个子网络,每一个子网络用于假定输出号码中的不同号码位。还有一个并行的子网络来决定存在多少号码。我觉得这种方法可以应用到这儿,但是我没有在这个项目中实现。
关于第2点我在上面举过例子,由于字体的稍微不同只能使用于国内车牌。如果尝试着检测US车牌号的话,误检将会更加严重,因为US车牌号字体类型更多。一个可能的解决方案就是使得训练数据有更多不同的字体类型可选择,尽管还不清楚需要多少字体类型才能成功。
第3点提到的速度慢的问题是扼杀许多应用的cancer:在一个相当强大的GPU上处理一张适当尺寸的输入图片就要花费几秒钟。我认为不引进一种级联式结构的检测网络就想避开这个问题是不太可能的,比如Haar级联,HOG检测器,或者一个更简单的神经网络。
我很有兴趣去尝试和其他机器学习方法的比较会怎样,特别是姿态回归看起来有希望,最后可能会附加一个最基本的分类阶段。如果使用了像scikit-learn这样的机器学习库,那么应该同样简单。
版权声明:如无特殊说明,文章均为本站原创,转载请注明出处
本文链接:http://wakemeupnow.cn/article/car/