Erreur de fusion estivale Tensorflow: la forme [-1,784] a des dimensions négatives

Question

J'essaie d'obtenir un résumé du processus de formation du réseau neuronal ci-dessous.

import tensorflow as tf import numpy as np from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets(".\MNIST",one_hot=True) # Create the model def train_and_test(hidden1,hidden2, learning_rate, epochs, batch_size): with tf.name_scope("first_layer"): input_data = tf.placeholder(tf.float32, [batch_size, 784], name = "input") weights1 = tf.Variable( tf.random_normal(shape =[784, hidden1],stddev=0.1),name = "weights") bias = tf.Variable(tf.constant(0.0,shape =[hidden1]), name = "bias") activation = tf.nn.relu( tf.matmul(input_data, weights1) + bias, name = "relu_act") tf.summary.histogram("first_activation", activation) with tf.name_scope("second_layer"): weights2 = tf.Variable( tf.random_normal(shape =[hidden1, hidden2],stddev=0.1), name = "weights") bias2 = tf.Variable(tf.constant(0.0,shape =[hidden2]), name = "bias") activation2 = tf.nn.relu( tf.matmul(activation, weights2) + bias2, name = "relu_act") tf.summary.histogram("second_activation", activation2) with tf.name_scope("output_layer"): weights3 = tf.Variable( tf.random_normal(shape=[hidden2, 10],stddev=0.5), name = "weights") bias3 = tf.Variable(tf.constant(1.0, shape =[10]), name = "bias") output = tf.add( tf.matmul(activation2, weights3, name = "mul"), bias3, name = "output") tf.summary.histogram("output_activation", output) y_ = tf.placeholder(tf.float32, [batch_size, 10]) with tf.name_scope("loss"): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=output)) tf.summary.scalar("cross_entropy", cross_entropy) with tf.name_scope("train"): train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy) with tf.name_scope("tests"): correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) summary_op = tf.summary.merge_all() sess = tf.InteractiveSession() writer = tf.summary.FileWriter("./data", sess.graph) tf.global_variables_initializer().run() # Train for i in range(epochs): batch_xs, batch_ys = mnist.train.next_batch(batch_size) _, summary = sess.run([train_step,summary_op], feed_dict={input_data: batch_xs, y_: batch_ys}) writer.add_summary(summary) if i % 10 ==0: test_xs, test_ys = mnist.train.next_batch(batch_size) test_accuracy = sess.run(accuracy, feed_dict = {input_data : test_xs, y_ : test_ys}) writer.close() return test_accuracy if __=="__main__": print(train_and_test(500, 200, 0.001, 10000, 100))

Je teste le modèle toutes les 10 étapes avec un lot aléatoire de données de test ..___ Le problème réside dans le sommateur. Le sess.run () à l'intérieur de la boucle for lève l'erreur suivante.

 Traceback (most recent call last): File "<ipython-input-18-78c88c8e6471>", line 1, in <module> runfile('C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py', wdir='C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow') File "C:\Users\Suman Nepal\Anaconda3\lib\site- packages\spyder\utils\site\sitecustomize.py", line 880, in runfile execfile(filename, namespace) File "C:\Users\Suman Nepal\Anaconda3\lib\site- packages\spyder\utils\site\sitecustomize.py", line 102, in execfile exec(compile(f.read(), filename, 'exec'), namespace) File "C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py", line 68, in <module> print(train_and_test(500, 200, 0.001, 100, 100)) File "C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py", line 58, in train_and_test _, summary = sess.run([train_step,summary_op], feed_dict={input_data: batch_xs, y_: batch_ys}) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\client\session.py", line 789, in run run_metadata_ptr) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\client\session.py", line 997, in _run feed_dict_string, options, run_metadata) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\client\session.py", line 1132, in _do_run target_list, options, run_metadata) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\client\session.py", line 1152, in _do_call raise type(e)(node_def, op, message) InvalidArgumentError: Shape [-1,784] has negative dimensions [[Node: first_layer_5/input = Placeholder[dtype=DT_FLOAT, shape=[?,784], _device="/job:localhost/replica:0/task:0/cpu:0"]()]] Caused by op 'first_layer_5/input', defined at: File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module> main() File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 227, in main kernel.start() File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start ioloop.IOLoop.instance().start() File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start super(ZMQIOLoop, self).start() File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ornado\ioloop.py", line 888, in start handler_func(fd_obj, events) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ornado\stack_context.py", line 277, in null_wrapper return fn(*args, **kwargs) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events self._handle_recv() File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv self._run_callback(callback, msg) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback callback(*args, **kwargs) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ornado\stack_context.py", line 277, in null_wrapper return fn(*args, **kwargs) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher return self.dispatch_Shell(stream, msg) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_Shell handler(stream, idents, msg) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request user_expressions, allow_stdin) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute res = Shell.run_cell(code, store_history=store_history, silent=silent) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2717, in run_cell interactivity=interactivity, compiler=compiler, result=result) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2827, in run_ast_nodes if self.run_code(code, result): File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "<ipython-input-8-78c88c8e6471>", line 1, in <module> runfile('C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py', wdir='C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow') File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile execfile(filename, namespace) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile exec(compile(f.read(), filename, 'exec'), namespace) File "C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py", line 86, in <module> File "C:/Users/Suman Nepal/Documents/Projects/MNISTtensorflow/mnist.py", line 12, in train_and_test input_data = tf.placeholder(tf.float32, [None, 784], name = "input") File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\ops\array_ops.py", line 1530, in placeholder return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\ops\gen_array_ops.py", line 1954, in _placeholder name=name) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\framework\op_def_library.py", line 767, in apply_op op_def=op_def) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\framework\ops.py", line 2506, in create_op original_op=self._default_original_op, op_def=op_def) File "C:\Users\Suman Nepal\Anaconda3\lib\site-packages	ensorflow\python\framework\ops.py", line 1269, in __init__ self._traceback = _extract_stack() InvalidArgumentError (see above for traceback): Shape [-1,784] has negative dimensions [[Node: first_layer_5/input = Placeholder[dtype=DT_FLOAT, shape=[?,784], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Si je supprime tous les rédacteurs de résumé et résumé, le modèle fonctionne correctement . Pouvez-vous m'aider à localiser le problème ici? J'ai essayé de manipuler les formes de tenseurs mais je n'ai abouti à rien.

Albert · Answer

D'un commentaire de la réponse supprimée, de l'affiche originale:

En fait, je construis un réseau neuronal sous with tf.Graph() as g. J'ai supprimé la session interactive et commencé la session en tant que with tf.Session(g) as sess. Cela a résolu le problème.

Le graphe g n'était pas marqué comme le graphe par défaut de cette façon, ainsi la session (tf.InteractiveSession dans le code d'origine) utiliserait un autre graphe à la place.

Notez que je suis tombé sur ici à cause du même message d'erreur. Dans mon cas, j'avais accidentellement quelque chose comme ceci:

input_data = tf.placeholder(tf.float32, shape=(None, 50)) input_data = tf.tanh(input_data) session.run(..., feed_dict={input_data: ...})

C'est à dire. Je n'ai pas nourri l'espace réservé. Il semble que certaines autres opérations de tenseurs puissent alors entraîner cette erreur déroutante, car une dimension indéfinie est représentée en interne par -1.

Fergal · Answer

J'avais aussi ce problème. Rechercher dans le consensus de base consiste à rechercher des problèmes ailleurs dans votre code.

Ce qui a réglé le problème pour moi, c'est que je faisais une sess.run(summary_op) sans alimenter les données de mes espaces réservés.

Tensorflow semble être un peu étrange avec les espaces réservés. Souvent, ils ne voudront pas que vous ne les alimentiez pas si vous essayez d'évaluer une partie du graphique qui en est indépendante. Ici cependant, il l'a fait.

RK_ · Answer

Cela a peut-être à voir avec l'initialisation InteractiveSession.

Je l'ai initialisé au début, puis cela a fonctionné, puis les variables globales ont été initialisées dans la session.

Je ne peux pas reproduire l'erreur avec l'ancien code, ce qui le rend imprévisible ou met en cache les paramètres quelque part.

import tensorflow as tf sess = tf.InteractiveSession() from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) x = tf.placeholder(tf.float32, [None, 784]) W = tf.Variable(tf.zeros([784,10])) b = tf.Variable(tf.zeros([10])) y = tf.nn.softmax(tf.matmul(x, W)+b) y_ = tf.placeholder(tf.float32, [None,10]) cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1])) train_step = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy) sess.run(tf.global_variables_initializer()) for _ in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) #print batch_xs.shape, batch_ys.shape sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})