Skip to content

All nan when using TFReformerLM #1

@usmansarwar23

Description

@usmansarwar23

I am using TensorFlow 2.0 on Linux but when I try to do a forward pass the output tensor contains NaN only.

Also if i try using the predict function it throws the following error:

(None, 3200)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-3e4eafdb6073> in <module>()
      1 code_vec=np.zeros((1,3200),dtype=np.int8)
----> 2 model_tf.predict(code_vec)

8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    235       except Exception as e:  # pylint:disable=broad-except
    236         if hasattr(e, 'ag_error_metadata'):
--> 237           raise e.ag_error_metadata.to_exception(e)
    238         else:
    239           raise

TypeError: in converted code:

    /content/master/reformers/TFreformers.py:78 call  *
        inputs = self.reformer(inputs)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /content/master/reformers/TFreformers.py:64 call  *
        x = self.model_layers(x)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /content/master/reformers/blocks.py:139 call  *
        h = block(h, training=training)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /content/master/reformers/blocks.py:304 call  *
        f_x2 = self.f(x2, training=training)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /content/master/reformers/TFutils.py:85 call  *
        return self.fn(inputs)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /content/master/reformers/TFefficient_attention.py:271 merge_heads  *
        return tf.reshape(tf.transpose(tf.reshape(v, (b, t, h, -1)), perm=[0, 2, 1, 3]), (b * h, t, -1))
    /tmp/tmpa6sn2593.py:21 merge_heads
        retval__1 = fscope_1.mark_return_value(ag__.converted_call(tf.reshape, (ag__.converted_call(tf.transpose, (ag__.converted_call(tf.reshape, (v, (b, t, h, -1)), None, fscope_1),), dict(perm=[0, 2, 1, 3]), fscope_1), (b * h, t, -1)), None, fscope_1))
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/array_ops.py:193 reshape
        result = gen_array_ops.reshape(tensor, shape, name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/gen_array_ops.py:7443 reshape
        "Reshape", tensor=tensor, shape=shape, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py:471 _apply_op_helper
        raise err
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/op_def_library.py:468 _apply_op_helper
        preferred_dtype=default_dtype)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/ops.py:1314 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/constant_op.py:317 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/constant_op.py:258 constant
        allow_broadcast=True)
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/constant_op.py:296 _constant_impl
        allow_broadcast=allow_broadcast))
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/tensor_util.py:547 make_tensor_proto
        "supported type." % (type(values), values))

    TypeError: Failed to convert object of type <class 'tuple'> to Tensor. Contents: (None, 3200, 8, -1). Consider casting elements to a supported type.```

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions