基于CNN卷积神经网络实现的通用文字或验证码识别方案

2018-5-12 Linyuting.cn 程序设计

    近年来,随着机器学习中深度学习算法卷积神经网络在图像识别领域广泛应用,以前我们需要利用特别的OCR算法来实现识别文字验证码等信息,如今,对于一些简单的验证码、文字识别任务,我们只需要准备一些简单的训练数据,制作一个含有特定卷积层、池化层、拉直等操作、深层神经网络的模型,丢入数据进行训练,就能很轻松的制作出我们的识别模型,这是前所未有的。对于卷积、池化、flatten等操作就不一一列出了,查阅一下即可。下面粘贴一些简单的验证码  识别代码(KERAS实现,实现ThinkPHP的验证码识别,我事先已经批量将thinkphp的验证码生成在sqlite数据库中,db.py读取,app.py执行训练,需要完整源代码请留言,由于我这台电脑没有完整代码,待回家后补上):


app.py


from keras.models import *
from keras.layers import *
from db import *
from PIL import ImageFile
import matplotlib.pyplot as plt

input_tensor = Input((height, width, 3))
x = input_tensor
for i in range(4):
    x = Conv2D(32*2**i, [3,3], activation='relu', padding='same')(x)
    x = Conv2D(32*2**i, [3,3], activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)
x = Dropout(0.25)(x)
x = [Dense(n_class, activation='softmax', name='c%d'%(i+1))(x) for i in range(4)]
model = Model(inputs=input_tensor, outputs=x)

model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])

model = load_model('models/0841.h5') 

X,y = next(read(batch_size=9400))
model.fit(X, y, epochs=20, batch_size=32)

model.save('models/02221252.h5')


    

db.py


import io
import sqlite3
from PIL import Image
from PIL import ImageFile
import base64
import numpy as np
import math
from PIL import ImageFile
import os,base64,io,PIL
import matplotlib.pyplot as plt


ImageFile.LOAD_TRUNCATED_IMAGES = True

# 连接数据库
db = r"collector.sqlite"
con = sqlite3.connect(db)
cur = con.cursor()

# 读取数据库标签图片
select_sql = "select * from data order by id asc"
cur.execute(select_sql)
date_set = cur.fetchall()

# 定义验证码字符,以及验证图片大小
characters = '123456789zxcvbnmasdfghjklqwertyuiop'
width, height, n_len, n_class = 160, 60, 4, len(characters)

# 解码ndarray验证码到字符串
def decode(y):
    y = np.argmax(np.array(y), axis=2)[:,0]
    return ''.join([characters[x] for x in y])

# 解码base64到图片
def decode_img(img_base64):
    img_data = base64.b64decode(img_base64)
    file_like = io.BytesIO(img_data)  
    img = PIL.Image.open(file_like)
    return img.convert('RGB')

# 图片转ndarray
def to_ndarray(image_data):
    X = np.zeros((1, image_data.size[1], image_data.size[0], 3), dtype=np.uint8)
    X[0] = np.array(image_data.getdata()).astype(np.float32).reshape((image_data.size[1],image_data.size[0],3))
    return X

# 批量迭代获取
def read(batch_size=100):
    data_size = len(date_set)
    r = range(math.ceil(data_size/batch_size))
    for page in r:
        X = np.zeros((batch_size, height, width, 3), dtype=np.uint8)
        y = [np.zeros((batch_size, n_class), dtype=np.uint8) for i in range(n_len)]
        start = page*batch_size
        end = (page+1)*batch_size if ((page+1)*batch_size)<data_size else data_size
        for i in range(start, end):
            x = i % batch_size
            code = date_set[i][1]
            tmpimg = decode_img(date_set[i][2])
            X[x] =  to_ndarray(tmpimg)
            for j, ch in enumerate(code):
                y[j][x, :] = 0
                y[j][x, characters.find(ch)] = 1
        
        yield X, y

def readAt(i):
    X = np.zeros((1, height, width, 3), dtype=np.uint8)
    y = [np.zeros((1, n_class), dtype=np.uint8) for i in range(n_len)]
    tmpimg = decode_img(date_set[i][2])
    code = date_set[i][1]
    X[0] = to_ndarray(tmpimg)
    y = code
    return X,y


标签: 深度学习

发表评论:

本站由emlog驱动 粤ICP备15042739号