python nn 聲音辨識 -2 建立nn
建立神經網路
網路上查聲音用這個function,跟這個loss算法(網站沒記下來)
訓練與預測的函數很像
- output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
- loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
- #訓練 要餵入 x , y
- sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
- #放入loss 就能輸出現在loss值
- loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
- #預測 只要餵入 x
- result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
完整程式碼:
拿0210.wav檔案測試,理想結果是[0.0, 0.0, 1.0, 0.0],是第二種聲音的第10個檔案
- import tensorflow as tf
- import numpy as np
- import wav_fft as wavf
- import os
- # 定義一個添加層的函數
- def add_layer(inputs, input_tensors, output_tensors, n_layer, activation_function = None):
- layer_name = 'layer%s' % n_layer
- print(layer_name)
- with tf.name_scope('Layer'):
- with tf.name_scope('Weights'):
- W = tf.Variable(tf.random_normal([input_tensors, output_tensors]), dtype=tf.float32 ,name = 'W') #
- tf.summary.histogram(name = layer_name + '/Weights', values = W)
- print(W.name)
- with tf.name_scope('Biases'):
- b = tf.Variable(tf.zeros([1, output_tensors])+0.1, dtype=tf.float32, name = 'b') #
- tf.summary.histogram(name = layer_name + '/Biases', values = b)
- with tf.name_scope('Formula'):
- formula = tf.add(tf.matmul(inputs, W), b)
- if activation_function is None:
- outputs = formula
- else:
- outputs = activation_function(formula)
- tf.summary.histogram(name = layer_name + '/Outputs', values = outputs)
- return outputs
- def nn_road(intput, input_len, output_len = 4):
- output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
- return output_layer
- def train_run():
- # 準備資料
- audio_value, answer = wavf.read_to_mat()
- x_data = audio_value
- y_data = answer
- x_data_len = len(audio_value[0])
- # 建立 Feeds
- with tf.name_scope('Inputs'):
- x_feeds = tf.placeholder(tf.float32, shape = [None, 525], name = 'x_inputs')
- y_feeds = tf.placeholder(tf.float32, shape = [None, 4], name = 'y_inputs')
- output_layer = nn_road(x_feeds, 525)
- # 定義 `loss` 與要使用的 Optimizer
- with tf.name_scope('Loss'):
- #loss = tf.reduce_mean(tf.square(y_feeds - output_layer))
- loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
- tf.summary.scalar('loss', loss)
- with tf.name_scope('Train'):
- optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
- train = optimizer.minimize(loss)
- # 初始化 Graph
- saver = tf.train.Saver(tf.global_variables())
- init = tf.global_variables_initializer()
- with tf.Session() as sess:
- # 開始運算
- sess.run(init)
- for step in range(5000):
- sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
- if step % 200 == 0:
- loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
- print(loss_)
- dir_name = 'tensorflow_model'
- if not os.path.exists(dir_name): #先確認資料夾是否存在
- os.makedirs(dir_name)
- print("保存模型: ", saver.save(sess, './' + dir_name + '/' + 'wav_nn' + '_model.ckpt'))
- #predict test
- temp, fs = wavf.read_file('./train_data/0210' + '.wav')
- ftmap, ftline = wavf.fft_domyself(temp, fs)
- normalized_linefft_data = (ftline - np.mean(ftline)) / np.std(ftline)
- x_test = normalized_linefft_data.tolist()
- result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
- print('[' + ' %0.10f'%result[0][0], ' %0.10f'%result[0][1], ' %0.10f'%result[0][2], ' %0.10f'%result[0][3] + ']')
- if __name__ == '__main__':
- train_run()
神經網路得出[ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]準確率相當高
- Out[1]:
- ./train_data/0001.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0101.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0201.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0301.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0002.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0102.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0202.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0302.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0003.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0103.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0203.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0303.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0004.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0104.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0204.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0304.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0005.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0105.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0205.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0305.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0006.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0106.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0206.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0306.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0007.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0107.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0207.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0307.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0008.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0108.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0208.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0308.wav [0.0, 0.0, 0.0, 1.0]
- ./train_data/0009.wav [1.0, 0.0, 0.0, 0.0]
- ./train_data/0109.wav [0.0, 1.0, 0.0, 0.0]
- ./train_data/0209.wav [0.0, 0.0, 1.0, 0.0]
- ./train_data/0309.wav [0.0, 0.0, 0.0, 1.0]
- layer1
- Layer/Weights/W:0
- 19.0726
- 0.00698306
- 0.00230496
- 0.00142493
- 0.00104555
- 0.000831516
- 0.000692958
- 0.000595413
- 0.000522728
- 0.000466331
- 0.000421281
- 0.000384355
- 0.000353519
- 0.000327388
- 0.000304925
- 0.000285414
- 0.000268292
- 0.000253139
- 0.000239637
- 0.00022752
- 0.000216596
- 0.000206709
- 0.000197676
- 0.000189412
- 0.000181816
- 保存模型: ./tensorflow_model/wav_nn_model.ckpt
- [ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]
跪求高手解有點不太懂
回覆刪除我這個不算是教學網站 你可以去看:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/
刪除我很多時間是在這邊學的