python nn 聲音辨識 -2 建立nn

建立神經網路


網路上查聲音用這個function,跟這個loss算法(網站沒記下來)
output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
訓練與預測的函數很像
#訓練 要餵入 x , y
sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
#放入loss 就能輸出現在loss值
loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
#預測 只要餵入 x
result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})

完整程式碼:

import tensorflow as tf
import numpy as np
import wav_fft as wavf
import os
# 定義一個添加層的函數
def add_layer(inputs, input_tensors, output_tensors, n_layer, activation_function = None):
    layer_name = 'layer%s' % n_layer
    print(layer_name)
    with tf.name_scope('Layer'):
        with tf.name_scope('Weights'):
            W = tf.Variable(tf.random_normal([input_tensors, output_tensors]), dtype=tf.float32 ,name = 'W')          #
            tf.summary.histogram(name = layer_name + '/Weights', values = W)
            print(W.name)
        with tf.name_scope('Biases'):
            b = tf.Variable(tf.zeros([1, output_tensors])+0.1, dtype=tf.float32, name = 'b')      #
            tf.summary.histogram(name = layer_name + '/Biases', values = b)
        with tf.name_scope('Formula'):
            formula = tf.add(tf.matmul(inputs, W), b)
            
        if activation_function is None:
            outputs = formula
        else:
            outputs = activation_function(formula)
        tf.summary.histogram(name = layer_name + '/Outputs', values = outputs)
        return outputs
def nn_road(intput, input_len, output_len = 4):
    output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
    return output_layer
    
def train_run():
    
    # 準備資料
    audio_value, answer = wavf.read_to_mat()
    x_data = audio_value
    y_data = answer
    x_data_len = len(audio_value[0])

    # 建立 Feeds
    with tf.name_scope('Inputs'):
        x_feeds = tf.placeholder(tf.float32, shape = [None, 525], name = 'x_inputs')
        y_feeds = tf.placeholder(tf.float32, shape = [None, 4], name = 'y_inputs')
        
    output_layer = nn_road(x_feeds, 525)
    
    # 定義 `loss` 與要使用的 Optimizer
    with tf.name_scope('Loss'):
        #loss = tf.reduce_mean(tf.square(y_feeds - output_layer))
        loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
        tf.summary.scalar('loss', loss)
    with tf.name_scope('Train'):
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
        train = optimizer.minimize(loss)
    
    # 初始化 Graph
    saver = tf.train.Saver(tf.global_variables())
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        
        # 開始運算
        sess.run(init)
        for step in range(5000):
            sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
            if step % 200 == 0:
                loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
                
                print(loss_)

        dir_name = 'tensorflow_model'
        if not os.path.exists(dir_name):    #先確認資料夾是否存在
            os.makedirs(dir_name)
        print("保存模型: ", saver.save(sess, './' + dir_name + '/' + 'wav_nn' + '_model.ckpt'))
        
        #predict test         
        temp, fs = wavf.read_file('./train_data/0210' + '.wav')
        ftmap, ftline = wavf.fft_domyself(temp, fs) 
        normalized_linefft_data = (ftline - np.mean(ftline)) / np.std(ftline)     
        x_test = normalized_linefft_data.tolist()     
        result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
        print('[' + ' %0.10f'%result[0][0], ' %0.10f'%result[0][1], ' %0.10f'%result[0][2], ' %0.10f'%result[0][3] + ']')

if __name__ == '__main__':
    train_run()

拿0210.wav檔案測試,理想結果是[0.0, 0.0, 1.0, 0.0],是第二種聲音的第10個檔案
神經網路得出[ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]準確率相當高
Out[1]:
./train_data/0001.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0101.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0201.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0301.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0002.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0102.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0202.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0302.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0003.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0103.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0203.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0303.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0004.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0104.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0204.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0304.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0005.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0105.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0205.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0305.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0006.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0106.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0206.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0306.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0007.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0107.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0207.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0307.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0008.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0108.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0208.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0308.wav [0.0, 0.0, 0.0, 1.0]
./train_data/0009.wav [1.0, 0.0, 0.0, 0.0]
./train_data/0109.wav [0.0, 1.0, 0.0, 0.0]
./train_data/0209.wav [0.0, 0.0, 1.0, 0.0]
./train_data/0309.wav [0.0, 0.0, 0.0, 1.0]
layer1
Layer/Weights/W:0
19.0726
0.00698306
0.00230496
0.00142493
0.00104555
0.000831516
0.000692958
0.000595413
0.000522728
0.000466331
0.000421281
0.000384355
0.000353519
0.000327388
0.000304925
0.000285414
0.000268292
0.000253139
0.000239637
0.00022752
0.000216596
0.000206709
0.000197676
0.000189412
0.000181816
保存模型:  ./tensorflow_model/wav_nn_model.ckpt
[ 0.0000000000  0.0000349988  0.9999548197  0.0000101149]

留言

  1. 回覆
    1. 我這個不算是教學網站 你可以去看:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/
      我很多時間是在這邊學的

      刪除

張貼留言

這個網誌中的熱門文章

C# 模擬鍵盤滑鼠控制電腦

android 定時通知(永久長期的) 本篇只講AlarmManager使用

python nn 聲音辨識 -1 傅立葉轉換

python pyautogui 簡介

python opencv 基本讀取、轉換、顯示、儲存等