From 2d91a9bd80fa4b58a1d16f1d60f1323c542b49f4 Mon Sep 17 00:00:00 2001 From: Difer <707065510@qq.com> Date: Fri, 4 Aug 2023 20:41:56 +0800 Subject: [PATCH] repacle embedding in fluid with 2.0 version (#55757) * replace embedding * replace sparse_embedding * fix some bugs * del embedding * repalce layers.embedding * fix type error --- python/paddle/fluid/layers/nn.py | 166 ------------------ .../distribute_transpiler/__init__.py | 2 +- .../fleet/parameter_server/pslib/__init__.py | 5 +- test/auto_parallel/test_dist_embedding.py | 25 ++- test/book/notest_understand_sentiment.py | 2 +- test/book/test_recommender_system.py | 20 +-- test/book/test_word2vec_book.py | 8 +- .../distributed/test_dist_pod128_sample.py | 2 +- test/ipu/distributed/test_dist_sample.py | 2 +- test/ipu/test_lookuptable_op_ipu.py | 2 +- test/ipu/test_weight_sharing_ipu.py | 2 +- ...r_embedding_eltwise_layernorm_fuse_pass.py | 54 +++--- test/legacy_test/dist_ctr.py | 4 +- test/legacy_test/dist_fleet_ctr.py | 8 +- .../dist_fleet_heter_pipeline_ctr.py | 4 +- test/legacy_test/dist_text_classification.py | 2 +- test/legacy_test/dist_word2vec.py | 8 +- test/legacy_test/fleet_heter_ps_training.py | 4 +- test/legacy_test/nets.py | 2 +- test/legacy_test/simple_nets.py | 2 +- test/legacy_test/test_communicator_geo.py | 4 +- ..._dist_fleet_a_sync_optimizer_auto_async.py | 2 +- ...st_dist_fleet_a_sync_optimizer_auto_geo.py | 2 +- .../test_dist_fleet_heter_program.py | 3 +- test/legacy_test/test_dist_fleet_ps.py | 6 +- test/legacy_test/test_dist_fleet_ps3.py | 6 +- test/legacy_test/test_dist_fleet_ps5.py | 6 +- test/legacy_test/test_dist_sparse_load_ps0.py | 2 +- .../test_dist_sparse_tensor_load_sgd.py | 2 +- test/legacy_test/test_dist_transpiler.py | 14 +- test/legacy_test/test_downpoursgd.py | 6 +- .../test_eager_deletion_padding_rnn.py | 3 +- test/legacy_test/test_entry_attr2.py | 2 +- test/legacy_test/test_fleet.py | 2 +- test/legacy_test/test_fleet_base_2.py | 4 +- test/legacy_test/test_fleet_nocvm_1.py | 2 +- test/legacy_test/test_fleet_unitaccessor.py | 2 +- test/legacy_test/test_gradient_clip.py | 2 +- test/legacy_test/test_hsigmoid_op.py | 2 +- test/legacy_test/test_layers.py | 16 +- test/legacy_test/test_lookup_table_bf16_op.py | 4 +- test/legacy_test/test_lookup_table_op.py | 9 +- test/legacy_test/test_monitor.py | 4 +- test/legacy_test/test_regularizer.py | 2 +- test/legacy_test/test_regularizer_api.py | 2 +- test/legacy_test/test_sgd_op_bf16.py | 2 +- test/legacy_test/test_weight_decay.py | 2 +- test/legacy_test/transformer_model.py | 5 +- 48 files changed, 141 insertions(+), 301 deletions(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 067e5a55c9c755..a4a770a97829a0 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -57,176 +57,10 @@ __all__ = [ - 'embedding', 'autoincreased_step_counter', ] -@deprecated(since="2.0.0", update_to="paddle.nn.functional.embedding") -def embedding( - input, - size, - is_sparse=False, - is_distributed=False, - padding_idx=None, - param_attr=None, - dtype='float32', -): - r""" - :api_attr: Static Graph - - **WARNING:** This OP will be deprecated in a future release. This OP requires the - last dimension of Tensor shape must be equal to 1. It is recommended to use - fluid. :ref:`api_fluid_embedding` . - - The operator is used to lookup embeddings vector of ids provided by :attr:`input` . - It automatically constructs a 2D embedding matrix based on the - input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . - - This OP requires the last dimension of Tensor shape must be equal to 1. The shape - of output Tensor is generated by replacing the last dimension of the input Tensor shape - with emb_size. - - **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , - otherwise the program will throw an exception and exit. - - .. code-block:: text - - Case 1: - - input is a Tensor. padding_idx = -1 - input.data = [[[1], [3]], [[2], [4]], [[4], [127]]] - input.shape = [3, 2, 1] - Given size = [128, 16] - output is a Tensor: - out.shape = [3, 2, 16] - out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654]], - - [[0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365]], - - [[0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]]] # padding data - The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 - It will pad all-zero data when ids is 127. - - Case 2: - - input is a LoDTensor with 1-level LoD. padding_idx = 0 - input.lod = [[2, 3]] - input.data = [[1], [3], [2], [4], [0]] - input.shape = [5, 1] - Given size = [128, 16] - output is a LoDTensor: - out.lod = [[2, 3]] - out.shape = [5, 16] - out.data = [[0.129435295, 0.244512452, ..., 0.436322452], - [0.345421456, 0.524563927, ..., 0.144534654], - [0.345249859, 0.124939536, ..., 0.194353745], - [0.945345345, 0.435394634, ..., 0.435345365], - [0.0, 0.0, ..., 0.0 ]] # padding data - It will pad all-zero data when ids is 0. - - Args: - input(Variable): A Tensor or LoDTensor with type int64, which contains the id information. - The last dimension of Tensor shape must be equal to 1. The value of the input id should - satisfy :math:`0<= id < size[0]` . - size(tuple|list): The shape of lookup table parameter. It should have two elements which - indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. - is_sparse(bool): The flag indicating whether to use sparse update. This parameter only - affects the performance of the backwards gradient update. It is recommended to set - True because sparse update is faster. But some optimizer does not support sparse update, - such as :ref:`api_fluid_optimizer_AdadeltaOptimizer` , :ref:`api_fluid_optimizer_AdamaxOptimizer` , - :ref:`api_fluid_optimizer_DecayedAdagradOptimizer` , :ref:`api_fluid_optimizer_FtrlOptimizer` , - :ref:`api_fluid_optimizer_LambOptimizer` and :ref:`api_fluid_optimizer_LarsMomentumOptimizer` . - In these case, is_sparse must be False. Default: False. - is_distributed(bool): Whether to store the embedding matrix in a distributed manner. Only used - in multi-machine distributed CPU training. Default: False. - padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). - If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted - to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup - encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. - If set None, it makes no effect to output. Default: None. - param_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the - default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, - user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. - The local word vector needs to be transformed into numpy format, and the shape of local word - vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` - is used to load custom or pre-trained word vectors. See code example 2 for details. - dtype(str|core.VarDesc.VarType): It refers to the data type of output Tensor. - It must be float32 or float64. Default: float32. - - Returns: - Variable: Embedding Tensor or LoDTensor mapped by input. The data type is the same as :attr:`dtype` . - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - import numpy as np - import paddle - paddle.enable_static() - - data = paddle.static.data(name='x', shape=[None, 1], dtype='int64') - - # example 1 - emb_1 = paddle.static.nn.embedding(input=data, size=[128, 64]) - - # example 2: load custom or pre-trained word vectors - weight_data = np.random.random(size=(128, 100)) # word vectors with numpy format - w_param_attrs = fluid.ParamAttr( - name="emb_weight", - learning_rate=0.5, - initializer=paddle.nn.initializer.Assign(weight_data), - trainable=True) - emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') - """ - - helper = LayerHelper('embedding', **locals()) - check_variable_and_dtype( - input, 'input', ['int64'], 'fluid.layers.embedding' - ) - check_dtype( - dtype, - 'dtype', - ['uint16', 'float16', 'float32', 'float64'], - 'fluid.layers.embedding', - ) - - if is_distributed: - is_distributed = False - warnings.warn( - "is_distributed is go out of use, `paddle.static.nn.sparse_embedding` is your needed" - ) - - remote_prefetch = True if is_sparse else False - - w = helper.create_parameter( - attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False - ) - tmp = helper.create_variable_for_type_inference(dtype) - padding_idx = ( - -1 - if padding_idx is None - else padding_idx - if padding_idx >= 0 - else (size[0] + padding_idx) - ) - helper.append_op( - type='lookup_table', - inputs={'Ids': input, 'W': w}, - outputs={'Out': tmp}, - attrs={ - 'is_sparse': is_sparse, - 'is_distributed': is_distributed, - 'remote_prefetch': remote_prefetch, - 'padding_idx': padding_idx, - }, - ) - return tmp - - def autoincreased_step_counter(counter_name=None, begin=1, step=1): """ :api_attr: Static Graph diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py index 8d3fb51cc794a7..800950e78f2199 100644 --- a/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py +++ b/python/paddle/incubate/distributed/fleet/parameter_server/distribute_transpiler/__init__.py @@ -157,7 +157,7 @@ def get_sparse_attrs(): if len(dist_varnames) != 0: raise ValueError( - "GeoStrategy can not support large scale embeding now, please use fluid.layers.embedding" + "GeoStrategy can not support large scale embeding now, please use paddle.static.nn.embedding" ) init_attrs = [] diff --git a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py index d4e1c77ae96f43..d8b61aadb5c0c6 100644 --- a/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py +++ b/python/paddle/incubate/distributed/fleet/parameter_server/pslib/__init__.py @@ -1124,7 +1124,7 @@ class fleet_embedding: Example: .. code-block:: python with fleet_embedding(click_name=label.name): - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=var, size=[-1, 11], is_sparse=True, @@ -1134,7 +1134,6 @@ class fleet_embedding: def __init__(self, click_name, scale_sparse_grad=True): """Init.""" - # self.origin_emb = fluid.layers.embedding self.origin_emb_v2 = paddle.static.nn.embedding # if user uses cvm layer after embedding, click_name can be None self.click_name = "" if click_name is None else click_name @@ -1144,7 +1143,6 @@ def __init__(self, click_name, scale_sparse_grad=True): def __enter__(self): """Enter.""" - # fluid.layers.embedding = _fleet_embedding paddle.static.nn.embedding = _fleet_embedding_v2 FLEET_GLOBAL_DICT["cur_accessor"] = self.accessor FLEET_GLOBAL_DICT["click_name"] = self.click_name @@ -1152,7 +1150,6 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): """Exit.""" - # fluid.layers.embedding = self.origin_emb paddle.static.nn.embedding = self.origin_emb_v2 FLEET_GLOBAL_DICT["cur_accessor"] = "" FLEET_GLOBAL_DICT["click_name"] = "" diff --git a/test/auto_parallel/test_dist_embedding.py b/test/auto_parallel/test_dist_embedding.py index 925b5c4bdee5a6..bdfdc0ef32a780 100644 --- a/test/auto_parallel/test_dist_embedding.py +++ b/test/auto_parallel/test_dist_embedding.py @@ -31,13 +31,26 @@ def make_program_lookup_table_v1_mp_dp(): name='src_ids', shape=[12, 512, 1], dtype='int64' ) src_ids.stop_gradient = True - emb_out = paddle.fluid.layers.embedding( - input=src_ids, - size=[64, 128], - param_attr=paddle.fluid.ParamAttr(name="emb_weight"), - dtype="float32", - is_sparse=False, + + emb_out = block.create_var(name='emb_out', dtype='float32') + w = paddle.create_parameter( + attr=paddle.fluid.ParamAttr(name="emb_weight"), + shape=[64, 128], + dtype='float32', + is_bias=False, + ) + block.append_op( + type='lookup_table', + outputs={'Out': emb_out}, + inputs={'Ids': src_ids, 'W': w}, + attrs={ + 'is_sparse': False, + 'is_distributed': False, + 'remote_prefetch': False, + 'padding_idx': None, + }, ) + loss = paddle.mean(emb_out) auto.shard_tensor( diff --git a/test/book/notest_understand_sentiment.py b/test/book/notest_understand_sentiment.py index eb4e01ae2949a0..8d7cde66bce56e 100644 --- a/test/book/notest_understand_sentiment.py +++ b/test/book/notest_understand_sentiment.py @@ -31,7 +31,7 @@ def convolution_net( data, label, input_dim, class_dim=2, emb_dim=32, hid_dim=32 ): - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, size=[input_dim, emb_dim], is_sparse=True ) conv_3 = nets.sequence_conv_pool( diff --git a/test/book/test_recommender_system.py b/test/book/test_recommender_system.py index 47cfb52c738a91..f6605a13149d7d 100644 --- a/test/book/test_recommender_system.py +++ b/test/book/test_recommender_system.py @@ -25,7 +25,7 @@ import paddle from paddle import fluid -from paddle.fluid import framework, layers +from paddle.fluid import framework from paddle.fluid.executor import Executor from paddle.fluid.optimizer import SGDOptimizer @@ -44,7 +44,7 @@ def get_usr_combined_features(): uid = paddle.static.data(name='user_id', shape=[-1, 1], dtype='int64') - usr_emb = layers.embedding( + usr_emb = paddle.static.nn.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], @@ -60,7 +60,7 @@ def get_usr_combined_features(): name='gender_id', shape=[-1, 1], dtype='int64' ) - usr_gender_emb = layers.embedding( + usr_gender_emb = paddle.static.nn.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', @@ -72,7 +72,7 @@ def get_usr_combined_features(): USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) usr_age_id = paddle.static.data(name='age_id', shape=[-1, 1], dtype="int64") - usr_age_emb = layers.embedding( + usr_age_emb = paddle.static.nn.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, @@ -84,7 +84,7 @@ def get_usr_combined_features(): USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 usr_job_id = paddle.static.data(name='job_id', shape=[-1, 1], dtype="int64") - usr_job_emb = layers.embedding( + usr_job_emb = paddle.static.nn.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', @@ -109,7 +109,7 @@ def get_mov_combined_features(): mov_id = paddle.static.data(name='movie_id', shape=[-1, 1], dtype='int64') - mov_emb = layers.embedding( + mov_emb = paddle.static.nn.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], @@ -125,12 +125,12 @@ def get_mov_combined_features(): name='category_id', shape=[-1, 1], dtype='int64', lod_level=1 ) - mov_categories_emb = layers.embedding( + mov_categories_emb = paddle.static.nn.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE ) mov_categories_hidden = paddle.static.nn.sequence_lod.sequence_pool( - input=mov_categories_emb, pool_type="sum" + input=mov_categories_emb.squeeze(-2), pool_type="sum" ) MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) @@ -139,12 +139,12 @@ def get_mov_combined_features(): name='movie_title', shape=[-1, 1], dtype='int64', lod_level=1 ) - mov_title_emb = layers.embedding( + mov_title_emb = paddle.static.nn.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE ) mov_title_conv = nets.sequence_conv_pool( - input=mov_title_emb, + input=mov_title_emb.squeeze(-2), num_filters=32, filter_size=3, act="tanh", diff --git a/test/book/test_word2vec_book.py b/test/book/test_word2vec_book.py index cdebfc58cfa9cd..0c59f005a22873 100644 --- a/test/book/test_word2vec_book.py +++ b/test/book/test_word2vec_book.py @@ -58,28 +58,28 @@ def train( IS_SPARSE = is_sparse def __network__(words): - embed_first = fluid.layers.embedding( + embed_first = paddle.static.nn.embedding( input=words[0], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w', ) - embed_second = fluid.layers.embedding( + embed_second = paddle.static.nn.embedding( input=words[1], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w', ) - embed_third = fluid.layers.embedding( + embed_third = paddle.static.nn.embedding( input=words[2], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w', ) - embed_forth = fluid.layers.embedding( + embed_forth = paddle.static.nn.embedding( input=words[3], size=[dict_size, EMBED_SIZE], dtype='float32', diff --git a/test/ipu/distributed/test_dist_pod128_sample.py b/test/ipu/distributed/test_dist_pod128_sample.py index 40a081a356ce05..9b0a33dfd87fba 100644 --- a/test/ipu/distributed/test_dist_pod128_sample.py +++ b/test/ipu/distributed/test_dist_pod128_sample.py @@ -59,7 +59,7 @@ def TestDistTraining(): with paddle.static.program_guard(main_prog, startup_prog): x = paddle.static.data(name="x", shape=[3, 2, 1], dtype='int64') with paddle.static.ipu_shard_guard(index=0, stage=0): - out = paddle.fluid.layers.embedding(x, **attrs) + out = paddle.static.nn.embedding(x, **attrs) with paddle.static.ipu_shard_guard(index=1, stage=1): loss = paddle.mean(out) opt = paddle.optimizer.Adam(learning_rate=1e-1) diff --git a/test/ipu/distributed/test_dist_sample.py b/test/ipu/distributed/test_dist_sample.py index 1300b2807eabe0..a5506db7e349fb 100644 --- a/test/ipu/distributed/test_dist_sample.py +++ b/test/ipu/distributed/test_dist_sample.py @@ -77,7 +77,7 @@ def Test(use_dist, file_name): with paddle.static.program_guard(main_prog, startup_prog): x = paddle.static.data(name="x", shape=[3, 2, 1], dtype='int64') - out = paddle.fluid.layers.embedding(x, **attrs) + out = paddle.static.nn.embedding(x, **attrs) loss = paddle.mean(out) opt = paddle.optimizer.Adam(learning_rate=1e-1) opt.minimize(loss) diff --git a/test/ipu/test_lookuptable_op_ipu.py b/test/ipu/test_lookuptable_op_ipu.py index e0e2b7ae598770..cf93159fbb1ec6 100644 --- a/test/ipu/test_lookuptable_op_ipu.py +++ b/test/ipu/test_lookuptable_op_ipu.py @@ -53,7 +53,7 @@ def build_model(self): x = paddle.static.data( name=self.feed_list[0], shape=self.feed_shape[0], dtype='int64' ) - out = paddle.fluid.layers.embedding(x, **self.attrs) + out = paddle.static.nn.embedding(x, **self.attrs) if self.is_training: loss = paddle.mean(out) adam = paddle.optimizer.Adam(learning_rate=1e-2) diff --git a/test/ipu/test_weight_sharing_ipu.py b/test/ipu/test_weight_sharing_ipu.py index 75ac2f5783199a..9f114fec99ab6d 100644 --- a/test/ipu/test_weight_sharing_ipu.py +++ b/test/ipu/test_weight_sharing_ipu.py @@ -55,7 +55,7 @@ def build_model(self): name=self.feed_list[0], shape=self.feed_shape[0], dtype='int64' ) with paddle.static.ipu_shard_guard(index=0, stage=0): - y = paddle.fluid.layers.embedding( + y = paddle.static.nn.embedding( input=x, size=[768, 768], dtype='float32', diff --git a/test/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py b/test/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py index 260f0a8913e9d6..dbdcdffdf5be1c 100644 --- a/test/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py +++ b/test/ir/test_ir_embedding_eltwise_layernorm_fuse_pass.py @@ -27,26 +27,26 @@ def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): word_id = paddle.static.data( name="word_id", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) pos_id = paddle.static.data( name="pos_id", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) sent_id = paddle.static.data( name="sent_id", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) - word_emb = fluid.layers.embedding( + word_emb = paddle.static.nn.embedding( input=word_id, size=(128, 768), dtype='float32' ) - pos_emb = fluid.layers.embedding( + pos_emb = paddle.static.nn.embedding( input=pos_id, size=(128, 768), dtype='float32' ) - sent_emb = fluid.layers.embedding( + sent_emb = paddle.static.nn.embedding( input=sent_id, size=(128, 768), dtype='float32' ) add1 = paddle.add(word_emb, pos_emb) @@ -55,34 +55,34 @@ def setUp(self): id1 = paddle.static.data( name="id1", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) id2 = paddle.static.data( name="id2", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) id3 = paddle.static.data( name="id3", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) id4 = paddle.static.data( name="id4", - shape=[1, 128, 1], + shape=[1, 128], dtype="int64", ) - emb1 = fluid.layers.embedding( + emb1 = paddle.static.nn.embedding( input=id1, size=(128, 768), dtype='float32' ) - emb2 = fluid.layers.embedding( + emb2 = paddle.static.nn.embedding( input=id2, size=(128, 768), dtype='float32' ) - emb3 = fluid.layers.embedding( + emb3 = paddle.static.nn.embedding( input=id3, size=(128, 768), dtype='float32' ) - emb4 = fluid.layers.embedding( + emb4 = paddle.static.nn.embedding( input=id4, size=(128, 768), dtype='float32' ) add_1 = paddle.add(emb1, emb2) @@ -93,25 +93,25 @@ def setUp(self): ) self.feeds = { - "word_id": np.random.randint( - low=0, high=128, size=(1, 128, 1) - ).astype("int64"), - "pos_id": np.random.randint( - low=0, high=128, size=(1, 128, 1) - ).astype("int64"), - "sent_id": np.random.randint( - low=0, high=128, size=(1, 128, 1) - ).astype("int64"), - "id1": np.random.randint(low=0, high=128, size=(1, 128, 1)).astype( + "word_id": np.random.randint(low=0, high=128, size=(1, 128)).astype( "int64" ), - "id2": np.random.randint(low=0, high=128, size=(1, 128, 1)).astype( + "pos_id": np.random.randint(low=0, high=128, size=(1, 128)).astype( "int64" ), - "id3": np.random.randint(low=0, high=128, size=(1, 128, 1)).astype( + "sent_id": np.random.randint(low=0, high=128, size=(1, 128)).astype( "int64" ), - "id4": np.random.randint(low=0, high=128, size=(1, 128, 1)).astype( + "id1": np.random.randint(low=0, high=128, size=(1, 128)).astype( + "int64" + ), + "id2": np.random.randint(low=0, high=128, size=(1, 128)).astype( + "int64" + ), + "id3": np.random.randint(low=0, high=128, size=(1, 128)).astype( + "int64" + ), + "id4": np.random.randint(low=0, high=128, size=(1, 128)).astype( "int64" ), } diff --git a/test/legacy_test/dist_ctr.py b/test/legacy_test/dist_ctr.py index 4056e5bc2285e0..148203d61ec68b 100644 --- a/test/legacy_test/dist_ctr.py +++ b/test/legacy_test/dist_ctr.py @@ -53,7 +53,7 @@ def get_model(self, batch_size=2): # build dnn model dnn_layer_dims = [128, 64, 32, 1] - dnn_embedding = fluid.layers.embedding( + dnn_embedding = paddle.static.nn.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], @@ -80,7 +80,7 @@ def get_model(self, batch_size=2): dnn_out = fc # build lr model - lr_embedding = fluid.layers.embedding( + lr_embedding = paddle.static.nn.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], diff --git a/test/legacy_test/dist_fleet_ctr.py b/test/legacy_test/dist_fleet_ctr.py index 7ac56dfff58867..a5634a0cfba28f 100644 --- a/test/legacy_test/dist_fleet_ctr.py +++ b/test/legacy_test/dist_fleet_ctr.py @@ -101,7 +101,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): # build dnn model dnn_layer_dims = [128, 128, 64, 32, 1] - dnn_embedding = fluid.layers.embedding( + dnn_embedding = paddle.static.nn.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], @@ -113,7 +113,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): padding_idx=0, ) dnn_pool = paddle.static.nn.sequence_lod.sequence_pool( - input=dnn_embedding, pool_type="sum" + input=dnn_embedding.squeeze(-2), pool_type="sum" ) dnn_out = dnn_pool for i, dim in enumerate(dnn_layer_dims[1:]): @@ -129,7 +129,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): dnn_out = fc # build lr model - lr_embedding = fluid.layers.embedding( + lr_embbding = paddle.static.nn.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], @@ -141,7 +141,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): padding_idx=0, ) lr_pool = paddle.static.nn.sequence_lod.sequence_pool( - input=lr_embedding, pool_type="sum" + input=lr_embbding.squeeze(-2), pool_type="sum" ) merge_layer = paddle.concat([dnn_out, lr_pool], axis=1) diff --git a/test/legacy_test/dist_fleet_heter_pipeline_ctr.py b/test/legacy_test/dist_fleet_heter_pipeline_ctr.py index 37a37ea2f7c355..db7bcf8fac1a44 100644 --- a/test/legacy_test/dist_fleet_heter_pipeline_ctr.py +++ b/test/legacy_test/dist_fleet_heter_pipeline_ctr.py @@ -72,7 +72,7 @@ def net(self, args, batch_size=4, lr=0.01): # build dnn model dnn_layer_dims = [128, 64, 32, 1] - dnn_embedding = fluid.layers.embedding( + dnn_embedding = paddle.static.nn.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], @@ -88,7 +88,7 @@ def net(self, args, batch_size=4, lr=0.01): dnn_out = dnn_pool # build lr model - lr_embedding = fluid.layers.embedding( + lr_embedding = paddle.static.nn.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], diff --git a/test/legacy_test/dist_text_classification.py b/test/legacy_test/dist_text_classification.py index bad17a3b6abdec..0736fb6a385505 100644 --- a/test/legacy_test/dist_text_classification.py +++ b/test/legacy_test/dist_text_classification.py @@ -55,7 +55,7 @@ def conv_net( fc0_dim=96, class_dim=2, ): - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=input, size=[dict_dim, emb_dim], is_sparse=False, diff --git a/test/legacy_test/dist_word2vec.py b/test/legacy_test/dist_word2vec.py index bbda8ac3558be5..3764fd5c5dcbad 100644 --- a/test/legacy_test/dist_word2vec.py +++ b/test/legacy_test/dist_word2vec.py @@ -34,7 +34,7 @@ def get_model(self, batch_size=2): BATCH_SIZE = batch_size def __network__(words): - embed_first = fluid.layers.embedding( + embed_first = paddle.static.nn.embedding( input=words[0], size=[dict_size, EMBED_SIZE], dtype='float32', @@ -44,7 +44,7 @@ def __network__(words): initializer=paddle.nn.initializer.Constant(value=0.1), ), ) - embed_second = fluid.layers.embedding( + embed_second = paddle.static.nn.embedding( input=words[1], size=[dict_size, EMBED_SIZE], dtype='float32', @@ -54,7 +54,7 @@ def __network__(words): initializer=paddle.nn.initializer.Constant(value=0.1), ), ) - embed_third = fluid.layers.embedding( + embed_third = paddle.static.nn.embedding( input=words[2], size=[dict_size, EMBED_SIZE], dtype='float32', @@ -64,7 +64,7 @@ def __network__(words): initializer=paddle.nn.initializer.Constant(value=0.1), ), ) - embed_forth = fluid.layers.embedding( + embed_forth = paddle.static.nn.embedding( input=words[3], size=[dict_size, EMBED_SIZE], dtype='float32', diff --git a/test/legacy_test/fleet_heter_ps_training.py b/test/legacy_test/fleet_heter_ps_training.py index aec4634fbed161..4871506e58aaae 100644 --- a/test/legacy_test/fleet_heter_ps_training.py +++ b/test/legacy_test/fleet_heter_ps_training.py @@ -64,7 +64,7 @@ def net(batch_size=4, lr=0.01): # build dnn model dnn_layer_dims = [2, 1] - dnn_embedding = fluid.layers.embedding( + dnn_embedding = paddle.static.nn.embedding( is_distributed=False, input=dnn_data, size=[dnn_input_dim, dnn_layer_dims[0]], @@ -80,7 +80,7 @@ def net(batch_size=4, lr=0.01): dnn_out = dnn_pool # build lr model - lr_embedding = fluid.layers.embedding( + lr_embedding = paddle.static.nn.embedding( is_distributed=False, input=lr_data, size=[lr_input_dim, 1], diff --git a/test/legacy_test/nets.py b/test/legacy_test/nets.py index 0727bf7ead038d..16a947b221e8ba 100644 --- a/test/legacy_test/nets.py +++ b/test/legacy_test/nets.py @@ -330,7 +330,7 @@ def sequence_conv_pool( emb_dim = 128 hid_dim = 512 data = paddle.static.data(name="words", shape=[None, 1], dtype="int64", lod_level=1) - emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True) + emb = paddle.static.nn.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True) seq_conv = fluid.nets.sequence_conv_pool(input=emb, num_filters=hid_dim, filter_size=3, diff --git a/test/legacy_test/simple_nets.py b/test/legacy_test/simple_nets.py index 8d19bbe08da552..8ff57cdce22db9 100644 --- a/test/legacy_test/simple_nets.py +++ b/test/legacy_test/simple_nets.py @@ -93,7 +93,7 @@ def bow_net( name="words", shape=[-1, 1], dtype="int64", lod_level=1 ) label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64") - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] ) bow = paddle.static.nn.sequence_lod.sequence_pool( diff --git a/test/legacy_test/test_communicator_geo.py b/test/legacy_test/test_communicator_geo.py index 1c93b92b92b50f..64a207160243d0 100644 --- a/test/legacy_test/test_communicator_geo.py +++ b/test/legacy_test/test_communicator_geo.py @@ -36,7 +36,7 @@ def net(self): name='x1', shape=[-1, 1], dtype='int64', lod_level=1 ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=x1, size=[10000, 10], param_attr=fluid.ParamAttr( @@ -47,7 +47,7 @@ def net(self): ) pool = paddle.static.nn.sequence_lod.sequence_pool( - input=emb, pool_type="sum" + input=emb.squeeze(-2), pool_type="sum" ) z = paddle.concat([x, pool], axis=1) diff --git a/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py b/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py index 8784e22c7b786b..b7386500c43133 100644 --- a/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -52,7 +52,7 @@ def test_a_sync_optimizer3(self): dtype="int64", lod_level=1, ) - x_embedding = paddle.fluid.layers.embedding( + x_embedding = paddle.static.nn.embedding( is_distributed=False, input=input_x, size=[1000000000, 100000], diff --git a/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py b/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py index d43a4397ac3a73..bde7a3d1820be2 100644 --- a/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py +++ b/test/legacy_test/test_dist_fleet_a_sync_optimizer_auto_geo.py @@ -49,7 +49,7 @@ def test_a_sync_optimizer2(self): input_x = paddle.static.data(name="x", shape=[-1, 1], dtype='int64') input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') - emb = paddle.fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=input_x, size=[100, 10], is_sparse=True ) diff --git a/test/legacy_test/test_dist_fleet_heter_program.py b/test/legacy_test/test_dist_fleet_heter_program.py index 9ee46aadc51da1..aad71627a9929b 100644 --- a/test/legacy_test/test_dist_fleet_heter_program.py +++ b/test/legacy_test/test_dist_fleet_heter_program.py @@ -83,9 +83,8 @@ def build_input(self): def build_net(self, inputs): def embedding_layer(input): - return fluid.layers.embedding( + return paddle.static.nn.sparse_embedding( input=input, - is_sparse=True, size=[100001, 10], param_attr=fluid.ParamAttr( name="SparseFeatFactors", diff --git a/test/legacy_test/test_dist_fleet_ps.py b/test/legacy_test/test_dist_fleet_ps.py index eb423b3c341fc8..03ee5bb67fcc5f 100644 --- a/test/legacy_test/test_dist_fleet_ps.py +++ b/test/legacy_test/test_dist_fleet_ps.py @@ -75,7 +75,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - q_emb = fluid.layers.embedding( + q_emb = paddle.static.nn.embedding( input=q, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -109,7 +109,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - pt_emb = fluid.layers.embedding( + pt_emb = paddle.static.nn.embedding( input=pt, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -142,7 +142,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - nt_emb = fluid.layers.embedding( + nt_emb = paddle.static.nn.embedding( input=nt, is_distributed=is_distributed, size=[dict_dim, emb_dim], diff --git a/test/legacy_test/test_dist_fleet_ps3.py b/test/legacy_test/test_dist_fleet_ps3.py index 9f1ff73b830187..59ca7c7dc61884 100644 --- a/test/legacy_test/test_dist_fleet_ps3.py +++ b/test/legacy_test/test_dist_fleet_ps3.py @@ -75,7 +75,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - q_emb = fluid.layers.embedding( + q_emb = paddle.static.nn.embedding( input=q, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -109,7 +109,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - pt_emb = fluid.layers.embedding( + pt_emb = paddle.static.nn.embedding( input=pt, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -142,7 +142,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - nt_emb = fluid.layers.embedding( + nt_emb = paddle.static.nn.embedding( input=nt, is_distributed=is_distributed, size=[dict_dim, emb_dim], diff --git a/test/legacy_test/test_dist_fleet_ps5.py b/test/legacy_test/test_dist_fleet_ps5.py index efc70346ab159b..a7c363bd8287aa 100644 --- a/test/legacy_test/test_dist_fleet_ps5.py +++ b/test/legacy_test/test_dist_fleet_ps5.py @@ -75,7 +75,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="query_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - q_emb = fluid.layers.embedding( + q_emb = paddle.static.nn.embedding( input=q, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -109,7 +109,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="pos_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - pt_emb = fluid.layers.embedding( + pt_emb = paddle.static.nn.embedding( input=pt, is_distributed=is_distributed, size=[dict_dim, emb_dim], @@ -142,7 +142,7 @@ def get_loss(cos_q_pt, cos_q_nt): name="neg_title_ids", shape=[-1, 1], dtype="int64", lod_level=1 ) # embedding - nt_emb = fluid.layers.embedding( + nt_emb = paddle.static.nn.embedding( input=nt, is_distributed=is_distributed, size=[dict_dim, emb_dim], diff --git a/test/legacy_test/test_dist_sparse_load_ps0.py b/test/legacy_test/test_dist_sparse_load_ps0.py index 7eded27da1f589..bd1ebef36f25ed 100644 --- a/test/legacy_test/test_dist_sparse_load_ps0.py +++ b/test/legacy_test/test_dist_sparse_load_ps0.py @@ -34,7 +34,7 @@ def net(self, emb_array, fc_array): 'input', shape=[None, 1], dtype="int64" ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=dense_input, is_sparse=True, size=[10, 10], diff --git a/test/legacy_test/test_dist_sparse_tensor_load_sgd.py b/test/legacy_test/test_dist_sparse_tensor_load_sgd.py index 4c08ca52beaa10..63f39626488fec 100644 --- a/test/legacy_test/test_dist_sparse_tensor_load_sgd.py +++ b/test/legacy_test/test_dist_sparse_tensor_load_sgd.py @@ -49,7 +49,7 @@ def net(self): inputs = paddle.static.data( 'input', shape=[None, 1], dtype="int64" ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( inputs, is_sparse=True, size=[10000, 128] ) fc1 = paddle.static.nn.fc( diff --git a/test/legacy_test/test_dist_transpiler.py b/test/legacy_test/test_dist_transpiler.py index 73ca10308eb87e..14a3baf95d7a53 100644 --- a/test/legacy_test/test_dist_transpiler.py +++ b/test/legacy_test/test_dist_transpiler.py @@ -352,7 +352,7 @@ def net_conf(self): inputs = [input_word, true_word, neg_word] init_width = 0.5 / embedding_size - input_emb = fluid.layers.embedding( + input_emb = paddle.static.nn.embedding( input=inputs[0], is_sparse=True, size=[dict_size, embedding_size], @@ -364,7 +364,7 @@ def net_conf(self): ), ) - true_emb_w = fluid.layers.embedding( + true_emb_w = paddle.static.nn.embedding( input=inputs[1], is_sparse=True, size=[dict_size, embedding_size], @@ -374,7 +374,7 @@ def net_conf(self): ), ) - true_emb_b = fluid.layers.embedding( + true_emb_b = paddle.static.nn.embedding( input=inputs[1], is_sparse=True, size=[dict_size, 1], @@ -387,7 +387,7 @@ def net_conf(self): neg_word_reshape = paddle.reshape(inputs[2], shape=[-1, 1]) neg_word_reshape.stop_gradient = True - neg_emb_w = fluid.layers.embedding( + neg_emb_w = paddle.static.nn.embedding( input=neg_word_reshape, is_sparse=True, size=[dict_size, embedding_size], @@ -398,7 +398,7 @@ def net_conf(self): neg_emb_w, shape=[-1, neg_num, embedding_size] ) - neg_emb_b = fluid.layers.embedding( + neg_emb_b = paddle.static.nn.embedding( input=neg_word_reshape, is_sparse=True, size=[dict_size, 1], @@ -712,7 +712,7 @@ def network_with_table(self, is_sparse, is_distributed): self.lookup_table_name = 'shared_w' def emb_pool(ids, table_name, is_distributed): - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=ids, size=[self.table_size, self.emb_size], dtype='float32', @@ -1427,7 +1427,7 @@ def network_with_table(self, is_sparse, is_distributed): ) ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=input, is_sparse=is_sparse, size=[3, 3], diff --git a/test/legacy_test/test_downpoursgd.py b/test/legacy_test/test_downpoursgd.py index e5294926e9e6b2..5bb65133b98a18 100644 --- a/test/legacy_test/test_downpoursgd.py +++ b/test/legacy_test/test_downpoursgd.py @@ -53,7 +53,7 @@ def test_device_work_use_cvm(self): ) os.system(cmd) x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64') - x_emb = fluid.layers.embedding( + x_emb = paddle.static.nn.embedding( input=x, size=[1, 2], is_distributed=True ) y_predict = paddle.static.nn.fc(x=x_emb, size=1) @@ -117,7 +117,7 @@ def test_device_work(self): ) os.system(cmd) x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64') - x_emb = fluid.layers.embedding( + x_emb = paddle.static.nn.embedding( input=x, size=[1, 2], is_distributed=True ) y_predict = paddle.static.nn.fc(x=x_emb, size=1) @@ -179,7 +179,7 @@ def test_downpour_opt_work(self): ) os.system(cmd) x = paddle.static.data(name='x', shape=[-1, 1], dtype='int64') - x_emb = fluid.layers.embedding( + x_emb = paddle.static.nn.embedding( input=x, size=[1, 2], is_distributed=True ) y_predict = paddle.static.nn.fc(x=x_emb, size=1) diff --git a/test/legacy_test/test_eager_deletion_padding_rnn.py b/test/legacy_test/test_eager_deletion_padding_rnn.py index 29195c3a2fc12b..bb00a8a4e20ff6 100644 --- a/test/legacy_test/test_eager_deletion_padding_rnn.py +++ b/test/legacy_test/test_eager_deletion_padding_rnn.py @@ -19,7 +19,6 @@ import paddle from paddle import fluid -from paddle.fluid import layers from paddle.fluid.executor import Executor os.environ["CPU_NUM"] = "1" @@ -241,7 +240,7 @@ def encoder_static( init_cell, shape=[num_layers, -1, hidden_size] ) - x_emb = layers.embedding( + x_emb = paddle.static.nn.embedding( input=x, size=[vocab_size, hidden_size], dtype='float32', diff --git a/test/legacy_test/test_entry_attr2.py b/test/legacy_test/test_entry_attr2.py index c8b4af3b2d853e..358e43c088cd23 100644 --- a/test/legacy_test/test_entry_attr2.py +++ b/test/legacy_test/test_entry_attr2.py @@ -31,7 +31,7 @@ def embedding_layer(self): input = paddle.static.data( name="dnn_data", shape=[-1, 1], dtype="int64", lod_level=1 ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=input, size=[100, 10], is_sparse=True, diff --git a/test/legacy_test/test_fleet.py b/test/legacy_test/test_fleet.py index c861304d9a2af9..245fa15ec83bfc 100644 --- a/test/legacy_test/test_fleet.py +++ b/test/legacy_test/test_fleet.py @@ -62,7 +62,7 @@ def test_pslib_1(self): dtype="int64", lod_level=1, ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=show, size=[1, 1], is_sparse=True, diff --git a/test/legacy_test/test_fleet_base_2.py b/test/legacy_test/test_fleet_base_2.py index c1d7d49326585f..667de8759f6b69 100644 --- a/test/legacy_test/test_fleet_base_2.py +++ b/test/legacy_test/test_fleet_base_2.py @@ -45,9 +45,7 @@ def test_ps_minimize(self): ) input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') - emb = paddle.fluid.layers.embedding( - input=input_slot, size=[10, 9], is_sparse=True - ) + emb = paddle.static.nn.sparse_embedding(input=input_slot, size=[10, 9]) input_x = paddle.concat(x=[input_x, emb], axis=1) fc_1 = paddle.static.nn.fc(x=input_x, size=64, activation='tanh') fc_2 = paddle.static.nn.fc(x=fc_1, size=64, activation='tanh') diff --git a/test/legacy_test/test_fleet_nocvm_1.py b/test/legacy_test/test_fleet_nocvm_1.py index 8fbe4984e2f3f6..26c94cbb542281 100644 --- a/test/legacy_test/test_fleet_nocvm_1.py +++ b/test/legacy_test/test_fleet_nocvm_1.py @@ -62,7 +62,7 @@ def test_pslib_1(self): dtype="int64", lod_level=1, ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=show, size=[1, 1], is_sparse=True, diff --git a/test/legacy_test/test_fleet_unitaccessor.py b/test/legacy_test/test_fleet_unitaccessor.py index 4145f9f1ce9b20..2228a8f6863f84 100644 --- a/test/legacy_test/test_fleet_unitaccessor.py +++ b/test/legacy_test/test_fleet_unitaccessor.py @@ -59,7 +59,7 @@ def test_pslib_1(self): show = paddle.static.data( name="show", shape=[-1, 1], dtype="int64", lod_level=1 ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=show, size=[1, 1], is_sparse=True, diff --git a/test/legacy_test/test_gradient_clip.py b/test/legacy_test/test_gradient_clip.py index 5b72f03339e31d..cc91c85bee0d1e 100644 --- a/test/legacy_test/test_gradient_clip.py +++ b/test/legacy_test/test_gradient_clip.py @@ -33,7 +33,7 @@ def bow_net( This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, is_sparse=True, size=[dict_dim, emb_dim] ) bow = paddle.static.nn.sequence_lod.sequence_pool( diff --git a/test/legacy_test/test_hsigmoid_op.py b/test/legacy_test/test_hsigmoid_op.py index 752fbab31d57a8..5e566a75d04bc3 100644 --- a/test/legacy_test/test_hsigmoid_op.py +++ b/test/legacy_test/test_hsigmoid_op.py @@ -294,7 +294,7 @@ def hs_net_conf(self, is_sparse): data_list = [input_word, path_table, path_code, label] - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=input_word, is_sparse=is_sparse, size=[3, 3], diff --git a/test/legacy_test/test_layers.py b/test/legacy_test/test_layers.py index e345be328b505e..25d5ad42bea489 100644 --- a/test/legacy_test/test_layers.py +++ b/test/legacy_test/test_layers.py @@ -24,7 +24,7 @@ import paddle import paddle.nn.functional as F from paddle import fluid -from paddle.fluid import core, layers +from paddle.fluid import core from paddle.fluid.dygraph import base, to_variable from paddle.fluid.framework import Program, default_main_program, program_guard from paddle.incubate.layers.nn import ( @@ -609,8 +609,8 @@ def test_embeding(self): name='word', shape=[-1, 1], dtype='int64' ) data_t.desc.set_need_check_feed(False) - emb = layers.embedding( - input=data_t, + emb = paddle.static.nn.embedding( + input=data_t.squeeze(-2), size=[dict_size, 32], param_attr='emb.w', is_sparse=False, @@ -1662,26 +1662,26 @@ def make_word_embedding(self): forth_word = self._get_data(name='forthw', shape=[1], dtype='int64') next_word = self._get_data(name='nextw', shape=[1], dtype='int64') - embed_first = layers.embedding( + embed_first = paddle.static.nn.embedding( input=first_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w', ) - embed_second = layers.embedding( + embed_second = paddle.static.nn.embedding( input=second_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w', ) - embed_third = layers.embedding( + embed_third = paddle.static.nn.embedding( input=third_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w', ) - embed_forth = layers.embedding( + embed_forth = paddle.static.nn.embedding( input=forth_word, size=[dict_size, embed_size], dtype='float32', @@ -1754,7 +1754,7 @@ def make_nce(self): if i == label_word: continue - emb = layers.embedding( + emb = paddle.static.nn.embedding( input=words[i], size=[dict_size, 32], param_attr='emb.w', diff --git a/test/legacy_test/test_lookup_table_bf16_op.py b/test/legacy_test/test_lookup_table_bf16_op.py index 6f932f780c9771..48cb484f0d81d2 100644 --- a/test/legacy_test/test_lookup_table_bf16_op.py +++ b/test/legacy_test/test_lookup_table_bf16_op.py @@ -236,7 +236,7 @@ def setUp(self): x = paddle.static.data( name='x', shape=self.ids_shape, dtype='int64' ) - self.emb = fluid.layers.embedding( + self.emb = paddle.static.nn.embedding( input=x, size=self.w_shape, param_attr=fluid.ParamAttr( @@ -256,7 +256,7 @@ def test_embedding_weights(self): np.testing.assert_array_equal(self.w_fp32, result) def test_lookup_results(self): - lookup_result = convert_uint16_to_float(self.result[1]) + lookup_result = convert_uint16_to_float(self.result[1].squeeze(-2)) lookup_ref = _lookup(self.w_fp32, self.ids, self.flat_ids) np.testing.assert_array_equal(lookup_result, lookup_ref) diff --git a/test/legacy_test/test_lookup_table_op.py b/test/legacy_test/test_lookup_table_op.py index cd26f390747ee4..04ac09bdce996b 100644 --- a/test/legacy_test/test_lookup_table_op.py +++ b/test/legacy_test/test_lookup_table_op.py @@ -25,7 +25,6 @@ import paddle import paddle.nn.functional as F -from paddle import fluid from paddle.fluid import Program, core, program_guard @@ -168,7 +167,7 @@ def test_errors(self): def test_Variable(): # the input type must be Variable - fluid.layers.embedding(input=input_data, size=(10, 64)) + paddle.static.nn.embedding(input=input_data, size=(10, 64)) self.assertRaises(TypeError, test_Variable) @@ -177,7 +176,7 @@ def test_input_dtype(): input = paddle.static.data( name='x', shape=[4, 1], dtype='float32' ) - fluid.layers.embedding(input=input, size=(10, 64)) + paddle.static.nn.embedding(input=input, size=(10, 64)) self.assertRaises(TypeError, test_input_dtype) @@ -186,7 +185,7 @@ def test_param_dtype(): input2 = paddle.static.data( name='x2', shape=[4, 1], dtype='int64' ) - fluid.layers.embedding( + paddle.static.nn.embedding( input=input2, size=(10, 64), dtype='int64' ) @@ -195,7 +194,7 @@ def test_param_dtype(): input3 = paddle.static.data( name='x3', shape=[4, 1], dtype='int64' ) - fluid.layers.embedding( + paddle.static.nn.embedding( input=input3, size=(10, 64), dtype='float16' ) diff --git a/test/legacy_test/test_monitor.py b/test/legacy_test/test_monitor.py index 128f9bb1dcbda9..37fb9cb898a63b 100644 --- a/test/legacy_test/test_monitor.py +++ b/test/legacy_test/test_monitor.py @@ -61,7 +61,9 @@ def test_dataset_run_with_stat(self): embs = [] for x in slots_vars: - emb = fluid.layers.embedding(x, is_sparse=True, size=[100001, 4]) + emb = paddle.static.nn.embedding( + x, is_sparse=True, size=[100001, 4] + ) embs.append(emb) dataset = paddle.distributed.InMemoryDataset() diff --git a/test/legacy_test/test_regularizer.py b/test/legacy_test/test_regularizer.py index af7103d704f481..d8add3c3760d1d 100644 --- a/test/legacy_test/test_regularizer.py +++ b/test/legacy_test/test_regularizer.py @@ -127,7 +127,7 @@ def bow_net( This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] ) bow = paddle.static.nn.sequence_lod.sequence_pool( diff --git a/test/legacy_test/test_regularizer_api.py b/test/legacy_test/test_regularizer_api.py index 415a5d963b1ad6..a00dc07022c497 100644 --- a/test/legacy_test/test_regularizer_api.py +++ b/test/legacy_test/test_regularizer_api.py @@ -39,7 +39,7 @@ def bow_net( This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] ) bow = paddle.static.nn.sequence_lod.sequence_pool( diff --git a/test/legacy_test/test_sgd_op_bf16.py b/test/legacy_test/test_sgd_op_bf16.py index 76a9819b073c24..9b58c7b00d2cec 100644 --- a/test/legacy_test/test_sgd_op_bf16.py +++ b/test/legacy_test/test_sgd_op_bf16.py @@ -342,7 +342,7 @@ def test_sgd(self): label = paddle.static.data( name='Y', shape=[-1] + y_shape, dtype='uint16' ) - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=x, size=self.w_shape, param_attr=fluid.ParamAttr( diff --git a/test/legacy_test/test_weight_decay.py b/test/legacy_test/test_weight_decay.py index ae85324e9d64a7..41bea82c4cd030 100644 --- a/test/legacy_test/test_weight_decay.py +++ b/test/legacy_test/test_weight_decay.py @@ -54,7 +54,7 @@ def bow_net( This model is from https://github.com/PaddlePaddle/models: fluid/PaddleNLP/text_classification/nets.py """ - emb = fluid.layers.embedding( + emb = paddle.static.nn.embedding( input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim] ) bow = paddle.static.nn.sequence_lod.sequence_pool( diff --git a/test/legacy_test/transformer_model.py b/test/legacy_test/transformer_model.py index 14010c06f0b30c..03f926c1fb4c04 100644 --- a/test/legacy_test/transformer_model.py +++ b/test/legacy_test/transformer_model.py @@ -18,7 +18,6 @@ import paddle from paddle import fluid -from paddle.fluid import layers pos_enc_param_names = ( "src_pos_enc_table", @@ -264,13 +263,13 @@ def prepare_encoder( This module is used at the bottom of the encoder stacks. """ - src_word_emb = layers.embedding( + src_word_emb = paddle.static.nn.embedding( src_word, size=[src_vocab_size, src_emb_dim], padding_idx=src_pad_idx, param_attr=paddle.nn.initializer.Normal(0.0, 1.0), ) - src_pos_enc = layers.embedding( + src_pos_enc = paddle.static.nn.embedding( src_pos, size=[src_max_len, src_emb_dim], padding_idx=pos_pad_idx,