python nn 聲音辨識 -2 建立nn
建立神經網路
網路上查聲音用這個function,跟這個loss算法(網站沒記下來)
output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax) loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))訓練與預測的函數很像
#訓練 要餵入 x , y
sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
#放入loss 就能輸出現在loss值
loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
#預測 只要餵入 x
result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
完整程式碼:
import tensorflow as tf
import numpy as np
import wav_fft as wavf
import os
# 定義一個添加層的函數
def add_layer(inputs, input_tensors, output_tensors, n_layer, activation_function = None):
layer_name = 'layer%s' % n_layer
print(layer_name)
with tf.name_scope('Layer'):
with tf.name_scope('Weights'):
W = tf.Variable(tf.random_normal([input_tensors, output_tensors]), dtype=tf.float32 ,name = 'W') #
tf.summary.histogram(name = layer_name + '/Weights', values = W)
print(W.name)
with tf.name_scope('Biases'):
b = tf.Variable(tf.zeros([1, output_tensors])+0.1, dtype=tf.float32, name = 'b') #
tf.summary.histogram(name = layer_name + '/Biases', values = b)
with tf.name_scope('Formula'):
formula = tf.add(tf.matmul(inputs, W), b)
if activation_function is None:
outputs = formula
else:
outputs = activation_function(formula)
tf.summary.histogram(name = layer_name + '/Outputs', values = outputs)
return outputs
def nn_road(intput, input_len, output_len = 4):
output_layer = add_layer(intput, input_tensors = input_len, output_tensors = output_len, n_layer = 1, activation_function = tf.nn.softmax)
return output_layer
def train_run():
# 準備資料
audio_value, answer = wavf.read_to_mat()
x_data = audio_value
y_data = answer
x_data_len = len(audio_value[0])
# 建立 Feeds
with tf.name_scope('Inputs'):
x_feeds = tf.placeholder(tf.float32, shape = [None, 525], name = 'x_inputs')
y_feeds = tf.placeholder(tf.float32, shape = [None, 4], name = 'y_inputs')
output_layer = nn_road(x_feeds, 525)
# 定義 `loss` 與要使用的 Optimizer
with tf.name_scope('Loss'):
#loss = tf.reduce_mean(tf.square(y_feeds - output_layer))
loss = tf.reduce_mean(-tf.reduce_sum(y_feeds * tf.log(output_layer), reduction_indices=[1]))
tf.summary.scalar('loss', loss)
with tf.name_scope('Train'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
train = optimizer.minimize(loss)
# 初始化 Graph
saver = tf.train.Saver(tf.global_variables())
init = tf.global_variables_initializer()
with tf.Session() as sess:
# 開始運算
sess.run(init)
for step in range(5000):
sess.run(train, feed_dict = {x_feeds: x_data, y_feeds: y_data})
if step % 200 == 0:
loss_ = sess.run(loss, feed_dict={x_feeds: x_data, y_feeds: y_data})
print(loss_)
dir_name = 'tensorflow_model'
if not os.path.exists(dir_name): #先確認資料夾是否存在
os.makedirs(dir_name)
print("保存模型: ", saver.save(sess, './' + dir_name + '/' + 'wav_nn' + '_model.ckpt'))
#predict test
temp, fs = wavf.read_file('./train_data/0210' + '.wav')
ftmap, ftline = wavf.fft_domyself(temp, fs)
normalized_linefft_data = (ftline - np.mean(ftline)) / np.std(ftline)
x_test = normalized_linefft_data.tolist()
result = sess.run(output_layer, feed_dict = {x_feeds: [x_test]})
print('[' + ' %0.10f'%result[0][0], ' %0.10f'%result[0][1], ' %0.10f'%result[0][2], ' %0.10f'%result[0][3] + ']')
if __name__ == '__main__':
train_run()
拿0210.wav檔案測試,理想結果是[0.0, 0.0, 1.0, 0.0],是第二種聲音的第10個檔案神經網路得出[ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]準確率相當高
Out[1]: ./train_data/0001.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0101.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0201.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0301.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0002.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0102.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0202.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0302.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0003.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0103.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0203.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0303.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0004.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0104.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0204.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0304.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0005.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0105.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0205.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0305.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0006.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0106.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0206.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0306.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0007.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0107.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0207.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0307.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0008.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0108.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0208.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0308.wav [0.0, 0.0, 0.0, 1.0] ./train_data/0009.wav [1.0, 0.0, 0.0, 0.0] ./train_data/0109.wav [0.0, 1.0, 0.0, 0.0] ./train_data/0209.wav [0.0, 0.0, 1.0, 0.0] ./train_data/0309.wav [0.0, 0.0, 0.0, 1.0] layer1 Layer/Weights/W:0 19.0726 0.00698306 0.00230496 0.00142493 0.00104555 0.000831516 0.000692958 0.000595413 0.000522728 0.000466331 0.000421281 0.000384355 0.000353519 0.000327388 0.000304925 0.000285414 0.000268292 0.000253139 0.000239637 0.00022752 0.000216596 0.000206709 0.000197676 0.000189412 0.000181816 保存模型: ./tensorflow_model/wav_nn_model.ckpt [ 0.0000000000 0.0000349988 0.9999548197 0.0000101149]
跪求高手解有點不太懂
回覆刪除我這個不算是教學網站 你可以去看:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/
刪除我很多時間是在這邊學的