python nn 聲音辨識 -2 建立nn

建立神經網路


網路上查聲音用這個function,跟這個loss算法(網站沒記下來)
  1. output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
  2. loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
訓練與預測的函數很像
  1. #訓練 要餵入 x , y
  2. sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
  3. #放入loss 就能輸出現在loss值
  4. loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
  5. #預測 只要餵入 x
  6. result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})

完整程式碼:

  1. import tensorflow as tf
  2. import numpy as np
  3. import wav_fft as wavf
  4. import os
  5. # 定義一個添加層的函數
  6. def add_layer(inputs, input_tensors, output_tensors, n_layer, activation_function = None):
  7. layer_name = 'layer%s' % n_layer
  8. print(layer_name)
  9. with tf.name_scope('Layer'):
  10. with tf.name_scope('Weights'):
  11. W = tf.Variable(tf.random_normal([input_tensors, output_tensors]), dtype=tf.float32 ,name = 'W') #
  12. tf.summary.histogram(name = layer_name + '/Weights', values = W)
  13. print(W.name)
  14. with tf.name_scope('Biases'):
  15. b = tf.Variable(tf.zeros([1, output_tensors])+0.1, dtype=tf.float32, name = 'b') #
  16. tf.summary.histogram(name = layer_name + '/Biases', values = b)
  17. with tf.name_scope('Formula'):
  18. formula = tf.add(tf.matmul(inputs, W), b)
  19. if activation_function is None:
  20. outputs = formula
  21. else:
  22. outputs = activation_function(formula)
  23. tf.summary.histogram(name = layer_name + '/Outputs', values = outputs)
  24. return outputs
  25. def nn_road(intput, input_len, output_len = 4):
  26. output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
  27. return output_layer
  28. def train_run():
  29. # 準備資料
  30. audio_value, answer = wavf.read_to_mat()
  31. x_data = audio_value
  32. y_data = answer
  33. x_data_len = len(audio_value[0])
  34.  
  35. # 建立 Feeds
  36. with tf.name_scope('Inputs'):
  37. x_feeds = tf.placeholder(tf.float32, shape = [None, 525], name = 'x_inputs')
  38. y_feeds = tf.placeholder(tf.float32, shape = [None, 4], name = 'y_inputs')
  39. output_layer = nn_road(x_feeds, 525)
  40. # 定義 `loss` 與要使用的 Optimizer
  41. with tf.name_scope('Loss'):
  42. #loss = tf.reduce_mean(tf.square(y_feeds - output_layer))
  43. loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
  44. tf.summary.scalar('loss', loss)
  45. with tf.name_scope('Train'):
  46. optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
  47. train = optimizer.minimize(loss)
  48. # 初始化 Graph
  49. saver = tf.train.Saver(tf.global_variables())
  50. init = tf.global_variables_initializer()
  51. with tf.Session() as sess:
  52. # 開始運算
  53. sess.run(init)
  54. for step in range(5000):
  55. sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
  56. if step % 200 == 0:
  57. loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
  58. print(loss_)
  59.  
  60. dir_name = 'tensorflow_model'
  61. if not os.path.exists(dir_name): #先確認資料夾是否存在
  62. os.makedirs(dir_name)
  63. print("保存模型: ", saver.save(sess, './' + dir_name + '/' + 'wav_nn' + '_model.ckpt'))
  64. #predict test
  65. temp, fs = wavf.read_file('./train_data/0210' + '.wav')
  66. ftmap, ftline = wavf.fft_domyself(temp, fs)
  67. normalized_linefft_data = (ftline - np.mean(ftline)) / np.std(ftline)
  68. x_test = normalized_linefft_data.tolist()
  69. result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
  70. print('[' + ' %0.10f'%result[0][0], ' %0.10f'%result[0][1], ' %0.10f'%result[0][2], ' %0.10f'%result[0][3] + ']')
  71.  
  72. if __name__ == '__main__':
  73. train_run()
  74.  
拿0210.wav檔案測試,理想結果是[0.0, 0.0, 1.0, 0.0],是第二種聲音的第10個檔案
神經網路得出[ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]準確率相當高
  1. Out[1]:
  2. ./train_data/0001.wav [1.0, 0.0, 0.0, 0.0]
  3. ./train_data/0101.wav [0.0, 1.0, 0.0, 0.0]
  4. ./train_data/0201.wav [0.0, 0.0, 1.0, 0.0]
  5. ./train_data/0301.wav [0.0, 0.0, 0.0, 1.0]
  6. ./train_data/0002.wav [1.0, 0.0, 0.0, 0.0]
  7. ./train_data/0102.wav [0.0, 1.0, 0.0, 0.0]
  8. ./train_data/0202.wav [0.0, 0.0, 1.0, 0.0]
  9. ./train_data/0302.wav [0.0, 0.0, 0.0, 1.0]
  10. ./train_data/0003.wav [1.0, 0.0, 0.0, 0.0]
  11. ./train_data/0103.wav [0.0, 1.0, 0.0, 0.0]
  12. ./train_data/0203.wav [0.0, 0.0, 1.0, 0.0]
  13. ./train_data/0303.wav [0.0, 0.0, 0.0, 1.0]
  14. ./train_data/0004.wav [1.0, 0.0, 0.0, 0.0]
  15. ./train_data/0104.wav [0.0, 1.0, 0.0, 0.0]
  16. ./train_data/0204.wav [0.0, 0.0, 1.0, 0.0]
  17. ./train_data/0304.wav [0.0, 0.0, 0.0, 1.0]
  18. ./train_data/0005.wav [1.0, 0.0, 0.0, 0.0]
  19. ./train_data/0105.wav [0.0, 1.0, 0.0, 0.0]
  20. ./train_data/0205.wav [0.0, 0.0, 1.0, 0.0]
  21. ./train_data/0305.wav [0.0, 0.0, 0.0, 1.0]
  22. ./train_data/0006.wav [1.0, 0.0, 0.0, 0.0]
  23. ./train_data/0106.wav [0.0, 1.0, 0.0, 0.0]
  24. ./train_data/0206.wav [0.0, 0.0, 1.0, 0.0]
  25. ./train_data/0306.wav [0.0, 0.0, 0.0, 1.0]
  26. ./train_data/0007.wav [1.0, 0.0, 0.0, 0.0]
  27. ./train_data/0107.wav [0.0, 1.0, 0.0, 0.0]
  28. ./train_data/0207.wav [0.0, 0.0, 1.0, 0.0]
  29. ./train_data/0307.wav [0.0, 0.0, 0.0, 1.0]
  30. ./train_data/0008.wav [1.0, 0.0, 0.0, 0.0]
  31. ./train_data/0108.wav [0.0, 1.0, 0.0, 0.0]
  32. ./train_data/0208.wav [0.0, 0.0, 1.0, 0.0]
  33. ./train_data/0308.wav [0.0, 0.0, 0.0, 1.0]
  34. ./train_data/0009.wav [1.0, 0.0, 0.0, 0.0]
  35. ./train_data/0109.wav [0.0, 1.0, 0.0, 0.0]
  36. ./train_data/0209.wav [0.0, 0.0, 1.0, 0.0]
  37. ./train_data/0309.wav [0.0, 0.0, 0.0, 1.0]
  38. layer1
  39. Layer/Weights/W:0
  40. 19.0726
  41. 0.00698306
  42. 0.00230496
  43. 0.00142493
  44. 0.00104555
  45. 0.000831516
  46. 0.000692958
  47. 0.000595413
  48. 0.000522728
  49. 0.000466331
  50. 0.000421281
  51. 0.000384355
  52. 0.000353519
  53. 0.000327388
  54. 0.000304925
  55. 0.000285414
  56. 0.000268292
  57. 0.000253139
  58. 0.000239637
  59. 0.00022752
  60. 0.000216596
  61. 0.000206709
  62. 0.000197676
  63. 0.000189412
  64. 0.000181816
  65. 保存模型: ./tensorflow_model/wav_nn_model.ckpt
  66. [ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]

留言

  1. 回覆
    1. 我這個不算是教學網站 你可以去看:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/
      我很多時間是在這邊學的

      刪除

張貼留言

這個網誌中的熱門文章

python nn 聲音辨識 -1 傅立葉轉換

android 定時通知(永久長期的) 本篇只講AlarmManager使用

C# 模擬鍵盤滑鼠控制電腦

raspberrypi 開機自動執行程式 與 在terminal開啟第二個terminal執行python

python pyautogui 簡介