Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix weight #26986

Merged
merged 6 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,49 @@

import unittest

import paddle
import paddle.nn as nn
import numpy as np

paddle.disable_static()


class EmbeddingDygraph(unittest.TestCase):
def test_1(self):
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)

embedding = paddle.nn.Embedding(10, 3, sparse=True)

w0 = np.full(shape=(10, 3), fill_value=2).astype(np.float32)
embedding.weight.set_value(w0)

adam = paddle.optimizer.Adam(
parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad()

out = embedding(x)
out.backward()
adam.step()

def test_2(self):
x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)

with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(10, 3, padding_idx=11, sparse=True)

# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(-1, 3, sparse=True)

emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False)
with self.assertRaises(ValueError):
embedding = paddle.nn.Embedding(10, -3, sparse=True)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,13 @@ def test_bad_x():
dtype="int32")

emb = functional.embedding(
x=label, weight=weight, sparse=True, name="embedding")
x=label,
weight=weight,
padding_idx=129,
sparse=True,
name="embedding")

with self.assertRaises(ValueError):
test_bad_x()


Expand Down
21 changes: 13 additions & 8 deletions python/paddle/nn/functional/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,18 @@ def one_hot(x, num_classes, name=None):

def embedding(x, weight, padding_idx=None, sparse=False, name=None):
"""
The operator is used to lookup embeddings vector of ids provided by :attr:`input` .
The operator is used to lookup embeddings vector of ids provided by :attr:`x` .

The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
with embedding size.
**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` ,

**Note:** The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
otherwise the program will throw an exception and exit.

.. code-block:: text

Case 1:
input is a Tensor.
x is a Tensor.
padding_idx = -1
x.data = [[1, 3], [2, 4], [4, 127]]
x.shape = [3, 2]
Expand All @@ -138,7 +139,7 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
[0.0, 0.0, ..., 0.0 ]]] # padding data

The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
It will pad all-zero data when ids is 127.
It will pad all-zero data when id is 127.

Args:
x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
Expand All @@ -151,18 +152,18 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these cases, is_sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
In these cases, sparse must be False. Default: False.
padding_idx(int|long|None): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
If set None, it makes no effect to output. Default: None.
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.

Returns:
Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`.
Tensor: Embedding Tensor mapped by x. The data type is the same as :attr:`weight`.

Examples:

Expand Down Expand Up @@ -209,6 +210,10 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):
padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
weight.shape[0] + padding_idx)

if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]:
raise ValueError("padding_idx must be within [-{}, {})".format(
weight.shape[0], weight.shape[0]))

helper.append_op(
type='lookup_table_v2',
inputs={'Ids': x,
Expand Down
73 changes: 47 additions & 26 deletions python/paddle/nn/layer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,22 +1551,18 @@ def forward(self, x1, x2):

class Embedding(layers.Layer):
"""
:alias_main: paddle.nn.Embedding
:alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding
:old_api: paddle.fluid.dygraph.Embedding

**Embedding Layer**

This interface is used to construct a callable object of the ``Embedding`` class.
For specific usage, refer to code examples. It implements the function of the Embedding Layer.
This layer is used to lookup embeddings vector of ids provided by :attr:`input` .
This layer is used to lookup embeddings vector of ids provided by :attr:`x` .
It automatically constructs a 2D embedding matrix based on the
input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` .
input :attr:`num_embeddings` and attr:`embedding_dim`.

The shape of output Tensor is generated by appending an emb_size dimension to the
last dimension of the input Tensor shape.

**Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` ,
**Note:** The id in :attr:`x` must satisfy :math:`0 =< id < num_embeddings` ,
otherwise the program will throw an exception and exit.

.. code-block:: text
Expand Down Expand Up @@ -1594,7 +1590,7 @@ class Embedding(layers.Layer):
num_embeddings (int): Just one element which indicate the size
of the dictionary of embeddings.
embedding_dim: Just one element which indicate the size of each embedding vector respectively.
padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size).
padding_idx(int|long|None): padding_idx needs to be in the interval [-num_embeddings, num_embeddings).
If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup
encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
Expand All @@ -1605,13 +1601,13 @@ class Embedding(layers.Layer):
such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` ,
:ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` ,
:ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` .
In these case, is_sparse must be False. Default: False.
In these case, sparse must be False. Default: False.
weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the
default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
default weight parameter property is used. See usage for details in :ref:`api_ParamAttr` . In addition,
user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter.
The local word vector needs to be transformed into numpy format, and the shape of local word
vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example 2 for details.
vector should be consistent with :attr:`num_embeddings` . Then :ref:`api_initializer_NumpyArrayInitializer`
is used to load custom or pre-trained word vectors. See code example for details.
name(str|None): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Expand All @@ -1626,20 +1622,34 @@ class Embedding(layers.Layer):

.. code-block:: python

import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
import paddle
import numpy as np

x_data = np.arange(3, 6).reshape((3, 1)).astype(np.int64)
y_data = np.arange(6, 12).reshape((3, 2)).astype(np.float32)
paddle.disable_static(paddle.CPUPlace())
x = paddle.to_tensor(x_data, stop_gradient=False)
y = paddle.to_tensor(y_data, stop_gradient=False)

embedding = paddle.nn.Embedding(10, 3, sparse=True)

w0=np.full(shape=(10, 3), fill_value=2).astype(np.float32)
embedding.weight.set_value(w0)

# example 1
inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64')
inp_word.shape # [2, 3]
dict_size = 20
adam = paddle.optimizer.Adam(parameters=[embedding.weight], learning_rate=0.01)
adam.clear_grad()

# weight.shape = [10, 3]

# x.data = [[3],[4],[5]]
# x.shape = [3, 1]

# out.data = [[2,2,2], [2,2,2], [2,2,2]]
# out.shape = [3, 1, 3]
out=embedding(x)
out.backward()
adam.step()

emb = nn.Embedding(
dict_size,
32,
sparse=False)
"""

def __init__(self,
Expand All @@ -1656,13 +1666,24 @@ def __init__(self,
self._is_distributed = False
self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else (
num_embeddings + padding_idx)

if self._num_embeddings <= 0:
raise ValueError("num_embeddings must be gather than 0")

if self._embedding_dim <= 0:
raise ValueError("embedding_dim must be gather than 0")

if self._padding_idx >= num_embeddings or self._padding_idx < -num_embeddings:
raise ValueError("padding_idx must be within [-{}, {})".format(
num_embeddings, num_embeddings))

self._dtype = self._helper.get_default_dtype()
self._size = [self._num_embeddings, self._embedding_dim]

self._weight_attr = weight_attr
self._remote_prefetch = False
self._name = name
self._weight = self.create_parameter(
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=self._size,
dtype=self._dtype,
Expand All @@ -1671,7 +1692,7 @@ def __init__(self,
def forward(self, x):
return F.embedding(
x,
weight=self._weight,
weight=self.weight,
padding_idx=self._padding_idx,
sparse=self._sparse,
name=self._name)