python nn 聲音辨識 -2 建立nn
建立神經網路
網路上查聲音用這個function,跟這個loss算法(網站沒記下來)
output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax) loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))訓練與預測的函數很像
#訓練 要餵入 x , y sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data}) #放入loss 就能輸出現在loss值 loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data}) #預測 只要餵入 x result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
完整程式碼:
import tensorflow as tf import numpy as np import wav_fft as wavf import os # 定義一個添加層的函數 def add_layer(inputs, input_tensors, output_tensors, n_layer, activation_function = None): layer_name = 'layer%s' % n_layer print(layer_name) with tf.name_scope('Layer'): with tf.name_scope('Weights'): W = tf.Variable(tf.random_normal([input_tensors, output_tensors]), dtype=tf.float32 ,name = 'W') # tf.summary.histogram(name = layer_name + '/Weights', values = W) print(W.name) with tf.name_scope('Biases'): b = tf.Variable(tf.zeros([1, output_tensors])+0.1, dtype=tf.float32, name = 'b') # tf.summary.histogram(name = layer_name + '/Biases', values = b) with tf.name_scope('Formula'): formula = tf.add(tf.matmul(inputs, W), b) if activation_function is None: outputs = formula else: outputs = activation_function(formula) tf.summary.histogram(name = layer_name + '/Outputs', values = outputs) return outputs def nn_road(intput, input_len, output_len = 4): output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax) return output_layer def train_run(): # 準備資料 audio_value, answer = wavf.read_to_mat() x_data = audio_value y_data = answer x_data_len = len(audio_value[0]) # 建立 Feeds with tf.name_scope('Inputs'): x_feeds = tf.placeholder(tf.float32, shape = [None, 525], name = 'x_inputs') y_feeds = tf.placeholder(tf.float32, shape = [None, 4], name = 'y_inputs') output_layer = nn_road(x_feeds, 525) # 定義 `loss` 與要使用的 Optimizer with tf.name_scope('Loss'): #loss = tf.reduce_mean(tf.square(y_feeds - output_layer)) loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1])) tf.summary.scalar('loss', loss) with tf.name_scope('Train'): optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01) train = optimizer.minimize(loss) # 初始化 Graph saver = tf.train.Saver(tf.global_variables()) init = tf.global_variables_initializer() with tf.Session() as sess: # 開始運算 sess.run(init) for step in range(5000): sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data}) if step % 200 == 0: loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data}) print(loss_) dir_name = 'tensorflow_model' if not os.path.exists(dir_name): #先確認資料夾是否存在 os.makedirs(dir_name) print("保存模型: ", saver.save(sess, './' + dir_name + '/' + 'wav_nn' + '_model.ckpt')) #predict test temp, fs = wavf.read_file('./train_data/0210' + '.wav') ftmap, ftline = wavf.fft_domyself(temp, fs) normalized_linefft_data = (ftline - np.mean(ftline)) / np.std(ftline) x_test = normalized_linefft_data.tolist() result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]}) print('[' + ' %0.10f'%result[0][0], ' %0.10f'%result[0][1], ' %0.10f'%result[0][2], ' %0.10f'%result[0][3] + ']') if __name__ == '__main__': train_run()拿0210.wav檔案測試,理想結果是[0.0, 0.0, 1.0, 0.0],是第二種聲音的第10個檔案
神經網路得出[ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]準確率相當高
Out[1]: ./train_data/0001.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0101.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0201.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0301.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0002.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0102.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0202.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0302.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0003.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0103.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0203.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0303.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0004.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0104.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0204.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0304.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0005.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0105.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0205.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0305.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0006.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0106.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0206.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0306.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0007.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0107.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0207.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0307.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0008.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0108.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0208.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0308.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0009.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0109.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0209.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0309.wav [0.0, 0.0, 0.0, 1.0] layer1 Layer/Weights/W:0 19.0726 0.00698306 0.00230496 0.00142493 0.00104555 0.000831516 0.000692958 0.000595413 0.000522728 0.000466331 0.000421281 0.000384355 0.000353519 0.000327388 0.000304925 0.000285414 0.000268292 0.000253139 0.000239637 0.00022752 0.000216596 0.000206709 0.000197676 0.000189412 0.000181816 保存模型: ./tensorflow_model/wav_nn_model.ckpt [ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]
跪求高手解有點不太懂
回覆刪除我這個不算是教學網站 你可以去看:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/
刪除我很多時間是在這邊學的