当前位置：首页 > news >正文

企业网站的优劣势wordpress菜单顺序

news 2025/11/22 7:01:18

企业网站的优劣势,wordpress菜单顺序,河南省交通基本建设质量检测监督站网站,软件开发工程师是前端还是后端最近在研究OCR识别相关的东西#xff0c;最终目标是能识别身份证上的所有中文汉字数字#xff0c;不过本文先设定一个小目标#xff0c;先识别定长为18的身份证号#xff0c;当然本文的思路也是可以复用来识别定长的验证码识别的。本文实现思路主要来源于Xlvector的博客最终目标是能识别身份证上的所有中文汉字数字不过本文先设定一个小目标先识别定长为18的身份证号当然本文的思路也是可以复用来识别定长的验证码识别的。本文实现思路主要来源于Xlvector的博客采用基于CNN实现端到端的OCR下面引用博文介绍目前基于深度学习的两种OCR识别方法把OCR的问题当做一个多标签学习的问题。4个数字组成的验证码就相当于有4个标签的图片识别问题这里的标签还是有序的用CNN来解决。把OCR的问题当做一个语音识别的问题语音识别是把连续的音频转化为文本验证码识别就是把连续的图片转化为文本用CNNLSTMCTC来解决。这里方法1主要用来解决固定长度标签的图片识别问题而方法2主要用来解决不定长度标签的图片识别问题本文实现方法1识别固定18个数字字符的身份证号。环境依赖本文基于tensorflow框架实现,依赖于tensorflow环境建议使用anaconda进行python包管理及环境管理本文使用freetype-py 进行训练集图片的实时生成同时后续也可扩展为能生成中文字符图片的训练集建议使用pip安装 pip install freetype-py 同时本文还依赖于numpy和opencv等常用库 pip install numpy cv2 知识准备本文不具体介绍CNN (卷积神经网络)具体实现原理不熟悉的建议参看集智博文卷积如何成为一个很厉害的神经网络这篇文章写得很本文实现思路很容易理解就是把一个有序排列18个数字组成的图片当做一个多标签学习的问题标签的长度可以任意改变只要是固定长度的这个训练方法都是适用的当然现实中很多情况是需要识别不定长度的标签的这部分就需要使用方法2(CNNlSTMCTC)来解决了。训练数据集生成首先先完成训练数据集图片的生成主要依赖于freetype-py库生成数字/中文的图片。其中要注意的一点是就是生成图片的大小本文经过多次尝试后生成的图片是32 x 256大小的如果图片太大则可能导致训练不收敛生成出来的示例图片如下gen_image()方法返回 image_data图片像素数据 (32,256) label 图片标签 18位数字字符 477081933151463759 vec : 图片标签转成向量表示 (180,) 代表每个数字所处的列总长度 18 * 10#!/usr/bin/env python2 # -*- coding: utf-8 -*-身份证文字数字生成类 author: pengyuanjieimport numpy as np import freetype import copy import random import cv2 class put_chinese_text(object):def __init__(self, ttf):self._face freetype.Face(ttf)def draw_text(self, image, pos, text, text_size, text_color):draw chinese(or not) text with ttf:param image: image(numpy.ndarray) to draw text:param pos: where to draw text:param text: the context, for chinese should be unicode type:param text_size: text size:param text_color:text color:return: imageself._face.set_char_size(text_size * 64)metrics self._face.sizeascender metrics.ascender/64.0#descender metrics.descender/64.0#height metrics.height/64.0#linegap height - ascender descenderypos int(ascender)if not isinstance(text, unicode):text text.decode(utf-8)img self.draw_string(image, pos[0], pos[1]ypos, text, text_color)return imgdef draw_string(self, img, x_pos, y_pos, text, color):draw string:param x_pos: text x-postion on img:param y_pos: text y-postion on img:param text: text (unicode):param color: text color:return: imageprev_char 0pen freetype.Vector()pen.x x_pos 6 # div 64pen.y y_pos 6hscale 1.0matrix freetype.Matrix(int(hscale)*0x10000L, int(0.2*0x10000L),int(0.0*0x10000L), int(1.1*0x10000L))cur_pen freetype.Vector()pen_translate freetype.Vector()image copy.deepcopy(img)for cur_char in text:self._face.set_transform(matrix, pen_translate)self._face.load_char(cur_char)kerning self._face.get_kerning(prev_char, cur_char)pen.x kerning.xslot self._face.glyphbitmap slot.bitmapcur_pen.x pen.xcur_pen.y pen.y - slot.bitmap_top * 64self.draw_ft_bitmap(image, bitmap, cur_pen, color)pen.x slot.advance.xprev_char cur_charreturn imagedef draw_ft_bitmap(self, img, bitmap, pen, color):draw each char:param bitmap: bitmap:param pen: pen:param color: pen color e.g.(0,0,255) - red:return: imagex_pos pen.x 6y_pos pen.y 6cols bitmap.widthrows bitmap.rowsglyph_pixels bitmap.bufferfor row in range(rows):for col in range(cols):if glyph_pixels[row*cols col] ! 0:img[y_pos row][x_pos col][0] color[0]img[y_pos row][x_pos col][1] color[1]img[y_pos row][x_pos col][2] color[2] class gen_id_card(object):def __init__(self):#self.words open(AllWords.txt, r).read().split( )self.number [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]self.char_set self.number#self.char_set self.words self.numberself.len len(self.char_set)self.max_size 18self.ft put_chinese_text(fonts/OCR-B.ttf)#随机生成字串长度固定#返回text,及对应的向量def random_text(self):text vecs np.zeros((self.max_size * self.len))#size random.randint(1, self.max_size)size self.max_sizefor i in range(size):c random.choice(self.char_set)vec self.char2vec(c)text text cvecs[i*self.len:(i1)*self.len] np.copy(vec)return text,vecs#根据生成的text生成image,返回标签和图片元素数据def gen_image(self):text,vec self.random_text()img np.zeros([32,256,3])color_ (255,255,255) # Writepos (0, 0)text_size 21image self.ft.draw_text(img, pos, text, text_size, color_)#仅返回单通道值颜色对于汉字识别没有什么意义return image[:,:,2],text,vec#单字转向量def char2vec(self, c):vec np.zeros((self.len))for j in range(self.len):if self.char_set[j] c:vec[j] 1return vec#向量转文本def vec2text(self, vecs):text v_len len(vecs)for i in range(v_len):if(vecs[i] 1):text text self.char_set[i % self.len]return text if __name__ __main__:genObj gen_id_card()image_data,label,vec genObj.gen_image()cv2.imshow(image, image_data)cv2.waitKey(0) 构建网络开始训练首先定义生成一个batch的方法# 生成一个训练batch def get_next_batch(batch_size128):obj gen_id_card()batch_x np.zeros([batch_size, IMAGE_HEIGHT*IMAGE_WIDTH])batch_y np.zeros([batch_size, MAX_CAPTCHA*CHAR_SET_LEN])for i in range(batch_size):image, text, vec obj.gen_image()batch_x[i,:] image.reshape((IMAGE_HEIGHT*IMAGE_WIDTH))batch_y[i,:] vecreturn batch_x, batch_y 用了Batch Normalization个人还不是很理解读者可自行百度代码来源于参考博文#Batch Normalization? 有空再理解,tflearn or slim都有封装 ## http://stackoverflow.com/a/34634291/2267819 def batch_norm(x, beta, gamma, phase_train, scopebn, decay0.9, eps1e-5):with tf.variable_scope(scope):#beta tf.get_variable(namebeta, shape[n_out], initializertf.constant_initializer(0.0), trainableTrue)#gamma tf.get_variable(namegamma, shape[n_out], initializertf.random_normal_initializer(1.0, stddev), trainableTrue)batch_mean, batch_var tf.nn.moments(x, [0, 1, 2], namemoments)ema tf.train.ExponentialMovingAverage(decaydecay)def mean_var_with_update():ema_apply_op ema.apply([batch_mean, batch_var])with tf.control_dependencies([ema_apply_op]):return tf.identity(batch_mean), tf.identity(batch_var)mean, var tf.cond(phase_train, mean_var_with_update, lambda: (ema.average(batch_mean), ema.average(batch_var)))normed tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)return normed 定义4层CNN和一层全连接层卷积核分别是2层5x5、2层3x3每层均使用tf.nn.relu非线性化,并使用max_pool网络结构读者可自行调参优化# 定义CNN def crack_captcha_cnn(w_alpha0.01, b_alpha0.1):x tf.reshape(X, shape[-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])# 4 conv layerw_c1 tf.Variable(w_alpha*tf.random_normal([5, 5, 1, 32]))b_c1 tf.Variable(b_alpha*tf.random_normal([32]))conv1 tf.nn.bias_add(tf.nn.conv2d(x, w_c1, strides[1, 1, 1, 1], paddingSAME), b_c1)conv1 batch_norm(conv1, tf.constant(0.0, shape[32]), tf.random_normal(shape[32], mean1.0, stddev0.02), train_phase, scopebn_1)conv1 tf.nn.relu(conv1)conv1 tf.nn.max_pool(conv1, ksize[1, 2, 2, 1], strides[1, 2, 2, 1], paddingSAME)conv1 tf.nn.dropout(conv1, keep_prob)w_c2 tf.Variable(w_alpha*tf.random_normal([5, 5, 32, 64]))b_c2 tf.Variable(b_alpha*tf.random_normal([64]))conv2 tf.nn.bias_add(tf.nn.conv2d(conv1, w_c2, strides[1, 1, 1, 1], paddingSAME), b_c2)conv2 batch_norm(conv2, tf.constant(0.0, shape[64]), tf.random_normal(shape[64], mean1.0, stddev0.02), train_phase, scopebn_2)conv2 tf.nn.relu(conv2)conv2 tf.nn.max_pool(conv2, ksize[1, 2, 2, 1], strides[1, 2, 2, 1], paddingSAME)conv2 tf.nn.dropout(conv2, keep_prob)w_c3 tf.Variable(w_alpha*tf.random_normal([3, 3, 64, 64]))b_c3 tf.Variable(b_alpha*tf.random_normal([64]))conv3 tf.nn.bias_add(tf.nn.conv2d(conv2, w_c3, strides[1, 1, 1, 1], paddingSAME), b_c3)conv3 batch_norm(conv3, tf.constant(0.0, shape[64]), tf.random_normal(shape[64], mean1.0, stddev0.02), train_phase, scopebn_3)conv3 tf.nn.relu(conv3)conv3 tf.nn.max_pool(conv3, ksize[1, 2, 2, 1], strides[1, 2, 2, 1], paddingSAME)conv3 tf.nn.dropout(conv3, keep_prob)w_c4 tf.Variable(w_alpha*tf.random_normal([3, 3, 64, 64]))b_c4 tf.Variable(b_alpha*tf.random_normal([64]))conv4 tf.nn.bias_add(tf.nn.conv2d(conv3, w_c4, strides[1, 1, 1, 1], paddingSAME), b_c4)conv4 batch_norm(conv4, tf.constant(0.0, shape[64]), tf.random_normal(shape[64], mean1.0, stddev0.02), train_phase, scopebn_4)conv4 tf.nn.relu(conv4)conv4 tf.nn.max_pool(conv4, ksize[1, 2, 2, 1], strides[1, 2, 2, 1], paddingSAME)conv4 tf.nn.dropout(conv4, keep_prob)# Fully connected layerw_d tf.Variable(w_alpha*tf.random_normal([2*16*64, 1024]))b_d tf.Variable(b_alpha*tf.random_normal([1024]))dense tf.reshape(conv4, [-1, w_d.get_shape().as_list()[0]])dense tf.nn.relu(tf.add(tf.matmul(dense, w_d), b_d))dense tf.nn.dropout(dense, keep_prob)w_out tf.Variable(w_alpha*tf.random_normal([1024, MAX_CAPTCHA*CHAR_SET_LEN]))b_out tf.Variable(b_alpha*tf.random_normal([MAX_CAPTCHA*CHAR_SET_LEN]))out tf.add(tf.matmul(dense, w_out), b_out)return out 最后执行训练使用sigmoid分类每100次计算一次准确率如果准确率超过80%则保存模型并结束训练# 训练 def train_crack_captcha_cnn():output crack_captcha_cnn()# loss#loss tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logitsoutput, labelsY))loss tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logitsoutput, labelsY))# 最后一层用来分类的softmax和sigmoid有什么不同# optimizer 为了加快训练 learning_rate应该开始大然后慢慢衰optimizer tf.train.AdamOptimizer(learning_rate0.002).minimize(loss)predict tf.reshape(output, [-1, MAX_CAPTCHA, CHAR_SET_LEN])max_idx_p tf.argmax(predict, 2)max_idx_l tf.argmax(tf.reshape(Y, [-1, MAX_CAPTCHA, CHAR_SET_LEN]), 2)correct_pred tf.equal(max_idx_p, max_idx_l)accuracy tf.reduce_mean(tf.cast(correct_pred, tf.float32))saver tf.train.Saver()with tf.Session() as sess:sess.run(tf.global_variables_initializer())step 0while True:batch_x, batch_y get_next_batch(64)_, loss_ sess.run([optimizer, loss], feed_dict{X: batch_x, Y: batch_y, keep_prob: 0.75, train_phase:True})print(step, loss_)# 每100 step计算一次准确率if step % 100 0 and step ! 0:batch_x_test, batch_y_test get_next_batch(100)acc sess.run(accuracy, feed_dict{X: batch_x_test, Y: batch_y_test, keep_prob: 1., train_phase:False})print 第%s步训练准确率为%s % (step, acc)# 如果准确率大80%,保存模型,完成训练if acc 0.8:saver.save(sess, crack_capcha.model, global_stepstep)breakstep 1 执行结果笔者在大概500次训练后得到准确率84.3%的结果笔者在一开始训练的时候图片大小是64 x 512的训练的时候发现训练速度很慢而且训练的loss不收敛一直保持在33左右缩小图片为32 x 256后解决不知道为啥猜测要么是网络层级不够或者特征层数不够吧。小目标完成后为了最终目标的完成后续可能尝试方法2去识别不定长的中文字符图片不过要先去理解LSTM网络和 CTC模型了。下载地址https://github.com/jimmyleaf/ocr_tensorflow_cnn/archive/master.zip

查看全文

http://www.pierceye.com/news/881433/