# traffic_sign 开发文档

# 读取训练数据集

pc = "mac"  # 根据自己平台设置，mac表示苹果PC，win表示windowsPC

不同系统有不同的路径表示方法，windows上路径都是右斜杠 \ ，代码中必须分开写，以提高兼容性。我目前是mac系统，在mac系统中，路径都是左斜杠 / 。代码中通过设置 pc 变量的值来区分当前运行环境。

cur_path = os.getcwd()

log_path = ""

if pc == "mac":
    # 当前路径mac版
    log_path = os.getcwd() + "/log"
    print("当前平台" + pc)
    # 检索图像及其标签
    for i in range(classes):
        path = os.path.join(cur_path, 'data/Train', str(i))
        images = os.listdir(path)
        print("正在加载第%d类训练图片" % (i + 1))
        for a in images:
            # mac版
            try:
                image = Image.open(path + '/' + a)
                image = image.resize((30, 30))
                image = np.array(image)
                data.append(image)
                labels.append(i)
            except FileNotFoundError:
                print("加载训练集图片出错！")

elif pc == "win":
    # 当前路径设置为win版
    log_path = os.getcwd() + "\\log"
    print("当前平台" + pc)
    # 检索图像及其标签
    for i in range(classes):
        path = os.path.join(cur_path, 'data/Train', str(i))
        images = os.listdir(path)
        print("正在加载第%d类训练图片" % (i + 1))
        for a in images:
            try:
                image = Image.open(path + '\\' + a)
                image = image.resize((30, 30))
                image = np.array(image)
                data.append(image)
                labels.append(i)
            except FileNotFoundError:
                print("加载训练集图片出错！")
else:
    raise Exception('print("路径设置出错！")')

通过 if 判断 pc 变量的值，进而确定运行环境。通过 for 循环将 data 文件夹中的文件遍历后，将图片大小转换成 30x30 的像素大小并转换成 numpy 数组添加到 image 对象中。同时将文件所在路径保存到 data 列表中，以及将路径中携带的标签信息保存到 labels 列表中。

# 将列表转换为numpy数组
data = np.array(data)
labels = np.array(labels)

最后将 data 和 labels 列表转换为 numpy 数组。

# 清洗数据

# 分割训练和测试数据集
# 训练集、测试集、训练标签集、测试标签集
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# 将标签转换为一种热编码(将数据扩维)One-Hot编码
y_train = to_categorical(y_train, 43)
y_test = to_categorical(y_test, 43)
# print(y_test)

将读取到的数据按照比例，随机分割为训练集，测试集，训练标签集，测试标签集。然后采用热编码的方式，将标签集转为热编码。

# 建立CNN卷积神经网络模型

# 建立模型
model = Sequential()
# 添加卷积输入层 16个节点 5*5的卷积核大小
model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu', input_shape=X_train.shape[1:]))

# 卷积层 + 最大池化层
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
# 防止过拟合，网络正则化，随机消灭上一层的神经元
model.add(Dropout(rate=0.25))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))

model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
# 展平层
model.add(Flatten())
# 密集连接层
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.5))
# 全连接 + 输出层
model.add(Dense(43, activation='softmax'))

# 编译模型 分类交叉熵损失函数 Adam优化器 这种搭配常用在多元分类中
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

首先将模型 序列化，然后依次添加 卷积输入层，卷积层， 最大池化层， 正则化层， 卷积层 ，池化层， 正则化层， 展平层， 密集连接层， 正则化层， 输出层。

然后编译模型。

# 训练模型

epochs = 11
tensorboard = TensorBoard(log_dir='./log', histogram_freq=1, write_graph=True, write_images=True, update_freq="epoch")

history = model.fit(X_train, y_train, batch_size=32, epochs=epochs, validation_data=(X_test, y_test),
                    callbacks=[tensorboard])
model.save("my_traffic_classifier.h5")

通过 11 个迭代用数据训练模型，完成后保存训练后的模型。

# 绘制图像

# 绘制图形以确保准确性
plt.figure(0)
# 训练集准确率
plt.plot(history.history['accuracy'], label='training accuracy')
# 测试集准确率
plt.plot(history.history['val_accuracy'], label='val accuracy')
plt.title('acc')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

用 matplotlib 工具绘制准确率以及损失函数图像。

# 验证准确率

# 测试数据集的测试准确性

y_test = pd.read_csv('data/Test.csv')

labels = y_test["ClassId"].values
imgs = y_test["Path"].values

data = []

for img in imgs:
    image = Image.open(img)
    image = image.resize((30, 30))
    data.append(np.array(image))

X_test = np.array(data)

pred = model.predict_classes(X_test)

# 测试数据的准确性
print(accuracy_score(labels, pred))

读取从未被神经网络学习过的新数据，以验证识别准确率。

← 运行项目 gui 开发文档 →