Comment appliquer Drop Out dans Tensorflow pour améliorer la précision du réseau de neurones?

Question

Les abandons sont des techniques de régularisation. Et je veux l'appliquer à des données notMNIST afin de réduire le sur-ajustement pour terminer mon assignation de cours d'apprentissage approfondi Udacity. J'ai lu le docs of tensorflow sur la façon d'appeler le tf.nn.dropout. Et voici mon code

# before proceeding further. from __future__ import print_function import numpy as np import tensorflow as tf from six.moves import cPickle as pickle pickle_file = 'notMNIST.pickle' with open(pickle_file, 'rb') as f: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] valid_dataset = save['valid_dataset'] valid_labels = save['valid_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] del save # hint to help gc free up memory print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) image_size = 28 num_labels = 10 def reformat(dataset, labels): dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32) # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...] labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32) return dataset, labels train_dataset, train_labels = reformat(train_dataset, train_labels) valid_dataset, valid_labels = reformat(valid_dataset, valid_labels) test_dataset, test_labels = reformat(test_dataset, test_labels) print('Training set', train_dataset.shape, train_labels.shape) print('Validation set', valid_dataset.shape, valid_labels.shape) print('Test set', test_dataset.shape, test_labels.shape) def accuracy(predictions, labels): return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0]) # ReLU neuron # param training_epochs = 30 batch_size = 521 display_step = 1 n_input = 784 # img shape: 28*28 n_classes = 10 # MNIST total classes (0-9 digits) # hyper-parameter n_hidden_1 = 256 learning_rate = 0.05 lambda_term = 0.01 graph = tf.Graph() with graph.as_default(): # init weights weights_hiden = tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=np.sqrt(n_input))) weights_out = tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=np.sqrt(n_hidden_1))) biases_hidden = tf.Variable(tf.random_normal([n_hidden_1])) biases_out = tf.Variable(tf.random_normal([n_classes])) x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) def model(x, weights_hiden, weights_out, biases_hidden, biases_out): # hidden layer with RELU activation layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer keep_prob = tf.placeholder(tf.float32) # DROP-OUT here drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(layer_1, weights_out) + biases_out return out_layer # Construct model pred = model(x, weights_hiden, weights_out, biases_hidden, biases_out) # Define loss and optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y) + lambda_term * tf.nn.l2_loss(weights_hiden) + lambda_term * tf.nn.l2_loss(weights_out) + lambda_term * tf.nn.l2_loss(biases_hidden) + lambda_term * tf.nn.l2_loss(biases_out)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # run the graph with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() print('Initialized') # Training cycle for Epoch in range(training_epochs): avg_cost = 0. total_batch = int(train_dataset.shape[0]/batch_size) # Loop over all batches for i in range(total_batch): batch_x = train_dataset[(i*batch_size):((i*batch_size) + batch_size), :] batch_y = train_labels[(i*batch_size):((i*batch_size) + batch_size), :] # Run optimization op (backprop) and cost op (to get loss value) _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y}) # Compute average loss avg_cost += c / total_batch # Display logs per Epoch step if Epoch % display_step == 0: print("Epoch:", '%04d' % (Epoch+1), "cost=", "{:.9f}".format(avg_cost)) print("Optimization Finished!") # Test model correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) # Calculate accuracy accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels})) print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels}))

Le tf.nn.dropout Est appelé dans la fonction model(), mais après avoir appliqué la technique DropOut au réseau de neurones, la précision semblait ne pas avoir changé, voici le résultat:

Epoch: 0001 cost= 579980.086977807 Epoch: 0002 cost= 238859.802382506 Epoch: 0003 cost= 90672.733752856 Epoch: 0004 cost= 32649.040985028 Epoch: 0005 cost= 11325.878361874 Epoch: 0006 cost= 3866.805511076 Epoch: 0007 cost= 1357.785540469 Epoch: 0008 cost= 519.381747333 Epoch: 0009 cost= 225.359804119 Epoch: 0010 cost= 110.099476707 Epoch: 0011 cost= 55.212384386 Epoch: 0012 cost= 28.469241683 Epoch: 0013 cost= 14.511494627 Epoch: 0014 cost= 6.567228943 Epoch: 0015 cost= 3.186372240 Epoch: 0016 cost= 1.701917576 Epoch: 0017 cost= 1.041632473 Epoch: 0018 cost= 0.843376874 Epoch: 0019 cost= 0.786183911 Epoch: 0020 cost= 0.775412846 Epoch: 0021 cost= 0.782965020 Epoch: 0022 cost= 0.796788171 Epoch: 0023 cost= 0.814522117 Epoch: 0024 cost= 0.832090579 Epoch: 0025 cost= 0.849197715 Epoch: 0026 cost= 0.867473578 Epoch: 0027 cost= 0.889561496 Epoch: 0028 cost= 0.921837020 Epoch: 0029 cost= 16.655304543 Epoch: 0030 cost= 1.421570476 Optimization Finished! Test data accuracy: 0.8775 Valid data accuracy: 0.8069

Comment puis-je appliquer DropOut by Tensorflow pour améliorer la précision du réseau? Je vous remercie!

Zhongyu Kuang · Accepted Answer

Dans le graphique, je suggérerais de déplacer keep_prob = tf.placeholder(tf.float32) en dehors de la fonction model pour la rendre globale.

with graph.as_default(): ... x = tf.placeholder("float", [None, n_input]) y = tf.placeholder("float", [None, n_classes]) keep_prob = tf.placeholder(tf.float32) def model(x, weights_hiden, weights_out, biases_hidden, biases_out, keep_prob): # hidden layer with RELU activation layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(drop_out, weights_out) + biases_out return out_layer ...

Lorsque vous exécutez session, introduisez un fichier keep_prob valeur pendant la période d’entraînement et alimentation 1.0 à keep_prob pendant la période de référence (validation et/ou test).

# run the graph with tf.Session(graph=graph) as sess: tf.initialize_all_variables().run() ... for Epoch in range(training_epochs): ... for i in range(total_batch): batch_x = ... batch_y = ... # Run optimization op (backprop) and cost op (to get loss value) # Feed a value < 1.0 for keep prob during training _, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob : 0.5}) ... # Feed 1.0 for keep prob during testing print("Test data accuracy:", accuracy.eval({x: test_dataset, y: test_labels, keep_prob : 1.0})) print("Valid data accuracy:", accuracy.eval({x: valid_dataset, y: valid_labels, keep_prob : 1.0}))

James Shiztar · Answer

Le point clé ici est que:

 layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer keep_prob = tf.placeholder(tf.float32) # DROP-OUT here drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(layer_1, weights_out) + biases_out

Devient:

 layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights_hiden), biases_hidden)) # apply DropOut to hidden layer drop_out = tf.nn.dropout(layer_1, keep_prob) # DROP-OUT here # output layer with linear activation out_layer = tf.matmul(drop_out, weights_out) + biases_out

Où drop_out est utilisé dans la dernière ligne comme opposé à layer_1. Autrement, la ligne de décrochage serait ignorée.