zburning · zburning · Oct 29, 2019 · Oct 25, 2019 · Oct 25, 2019 · Oct 27, 2019
diff --git a/ci/batch/docker/Dockerfile b/ci/batch/docker/Dockerfile
@@ -24,5 +24,4 @@ FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
  ENV PATH /opt/conda/bin:$PATH
  RUN git clone https://github.com/dmlc/gluon-nlp
  WORKDIR gluon-nlp
- RUN /bin/bash -c 'CONDA_ENVS_PATH=$PWD/conda CONDA_PKGS_DIRS=$PWD/conda/pkgs conda init bash && source /root/.bashrc && conda env update --prune -p conda/gpu/py3 -f env/gpu/py3.yml && source activate ./conda/gpu/py3 && pip install -v -e . && pip install awscli && python -m spacy download en && python -m spacy download de && python -m nltk.downloader all'
  ADD gluon_nlp_job.sh .
diff --git a/docs/api/notes/data_api.rst b/docs/api/notes/data_api.rst
@@ -158,7 +158,7 @@ lengths in the minibatch, which allows the fast tensor manipulation in GPU.
 
 .. code:: python
 
-   >>> batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(axis=0),
+   >>> batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(axis=0, pad_val=0),
    >>>                                       nlp.data.batchify.Stack())
 
 :class:`~gluonnlp.data.batchify.Tuple` wraps multiple batchify functions and applies each input function on each input field,

diff --git a/docs/examples/machine_translation/gnmt.md b/docs/examples/machine_translation/gnmt.md
@@ -277,12 +277,12 @@ is to construct the sampler and `DataLoader`. The first step is to use the `batc
 function, which pads and stacks sequences to form mini-batches.
 
 ```{.python .input}
-train_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(),
-                                            nlp.data.batchify.Pad(),
+train_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(pad_val=0),
+                                            nlp.data.batchify.Pad(pad_val=0),
                                             nlp.data.batchify.Stack(dtype='float32'),
                                             nlp.data.batchify.Stack(dtype='float32'))
-test_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(),
-                                           nlp.data.batchify.Pad(),
+test_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(pad_val=0),
+                                           nlp.data.batchify.Pad(pad_val=0),
                                            nlp.data.batchify.Stack(dtype='float32'),
                                            nlp.data.batchify.Stack(dtype='float32'),
                                            nlp.data.batchify.Stack())

diff --git a/docs/examples/machine_translation/transformer.md b/docs/examples/machine_translation/transformer.md
@@ -121,8 +121,8 @@ Now, we have obtained the transformed datasets. The next step is to construct th
 
 ```{.python .input}
 wmt_test_batchify_fn = nlp.data.batchify.Tuple(
-    nlp.data.batchify.Pad(),
-    nlp.data.batchify.Pad(),
+    nlp.data.batchify.Pad(pad_val=0),
+    nlp.data.batchify.Pad(pad_val=0),
     nlp.data.batchify.Stack(dtype='float32'),
     nlp.data.batchify.Stack(dtype='float32'),
     nlp.data.batchify.Stack())

diff --git a/docs/examples/sentence_embedding/elmo_sentence_representation.md b/docs/examples/sentence_embedding/elmo_sentence_representation.md
@@ -109,7 +109,7 @@ For more advanced usage examples of the DataLoader object, check out the
 
 ```{.python .input}
 batch_size = 2
-dataset_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(),
+dataset_batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(pad_val=0),
                                               nlp.data.batchify.Stack())
 data_loader = gluon.data.DataLoader(dataset,
                                     batch_size=batch_size,

diff --git a/docs/examples/sentence_embedding/self_attentive_sentence_embedding.md b/docs/examples/sentence_embedding/self_attentive_sentence_embedding.md
@@ -172,7 +172,7 @@ def get_dataloader():
 
     # Construct the DataLoader Pad data, stack label and lengths
     batchify_fn = nlp.data.batchify.Tuple(
-        nlp.data.batchify.Pad(axis=0),
+        nlp.data.batchify.Pad(axis=0, pad_val=0),
         nlp.data.batchify.Stack())
 
     # In this example, we use a FixedBucketSampler,

diff --git a/docs/examples/sentiment_analysis/sentiment_analysis.md b/docs/examples/sentiment_analysis/sentiment_analysis.md
@@ -195,7 +195,7 @@ def get_dataloader():
 
     # Pad data, stack label and lengths
     batchify_fn = nlp.data.batchify.Tuple(
-        nlp.data.batchify.Pad(axis=0, ret_length=True),
+        nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
         nlp.data.batchify.Stack(dtype='float32'))
     batch_sampler = nlp.data.sampler.FixedBucketSampler(
         train_data_lengths,

diff --git a/env/cpu/py3-master.yml b/env/cpu/py3-master.yml
@@ -19,7 +19,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes==0.2.1
-    - mxnet-mkl>=1.6.0b20191006
+    - mxnet-cu100>=1.6.0b20191027
     - sacremoses
     - sentencepiece<0.2
     - sphinx-autodoc-typehints==1.7.0
diff --git a/env/docker/py3.yml b/env/docker/py3.yml
@@ -30,7 +30,7 @@ dependencies:
   - scikit-learn=0.21.3
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu101mkl>=1.6.0b20191006
+    - mxnet-cu100>=1.6.0b20191027
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/env/gpu/py3-master.yml b/env/gpu/py3-master.yml
@@ -29,7 +29,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu101mkl>=1.6.0b20191006
+    - mxnet-cu100>=1.6.0b20191027
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/pytest.ini b/pytest.ini
@@ -10,3 +10,6 @@ markers =
 
 env =
     MXNET_HOME=tests/data
+
+filterwarnings =
+    error
diff --git a/scripts/bert/finetune_icsl.py b/scripts/bert/finetune_icsl.py
@@ -328,10 +328,10 @@ def train(args):
     dev_data_bert = dev_data.transform(idsl_transform, lazy=False)
     test_data_bert = test_data.transform(idsl_transform, lazy=False)
     # Construct the DataLoader
-    batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(),    # Subword ID
-                                          nlp.data.batchify.Pad(),    # Subword Mask
-                                          nlp.data.batchify.Pad(),    # Beginning of subword
-                                          nlp.data.batchify.Pad(),    # Tag IDs
+    batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(pad_val=0),  # Subword ID
+                                          nlp.data.batchify.Pad(pad_val=0),  # Subword Mask
+                                          nlp.data.batchify.Pad(pad_val=0),  # Beginning of subword
+                                          nlp.data.batchify.Pad(pad_val=0),  # Tag IDs
                                           nlp.data.batchify.Stack(),  # Intent Label
                                           nlp.data.batchify.Stack())  # Valid Length
     train_batch_sampler = nlp.data.sampler.SortedBucketSampler(

diff --git a/scripts/machine_translation/dataprocessor.py b/scripts/machine_translation/dataprocessor.py
@@ -217,12 +217,12 @@ def get_dataloader(data_set, args, dataset_type,
     data_lengths = get_data_lengths(data_set)
 
     if dataset_type == 'train':
-        train_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(),
+        train_batchify_fn = btf.Tuple(btf.Pad(pad_val=0), btf.Pad(pad_val=0),
                                       btf.Stack(dtype='float32'), btf.Stack(dtype='float32'))
 
     else:
         data_lengths = list(map(lambda x: x[-1], data_lengths))
-        test_batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(),
+        test_batchify_fn = btf.Tuple(btf.Pad(pad_val=0), btf.Pad(pad_val=0),
                                      btf.Stack(dtype='float32'), btf.Stack(dtype='float32'),
                                      btf.Stack())
 

diff --git a/scripts/machine_translation/dataset.py b/scripts/machine_translation/dataset.py
@@ -23,10 +23,8 @@
 import os
 from gluonnlp.base import get_home_dir
 from gluonnlp.data.translation import _TranslationDataset, _get_pair_key
-from gluonnlp.data.registry import register
 
 
-@register(segment=['train', 'val', 'test'])
 class TOY(_TranslationDataset):
     """A Small Translation Dataset for Testing Scripts.
 

diff --git a/scripts/natural_language_inference/dataset.py b/scripts/natural_language_inference/dataset.py
@@ -66,7 +66,7 @@ def prepare_data_loader(args, dataset, vocab, test=False):
                                 lazy=False)
 
     # Batching
-    batchify_fn = btf.Tuple(btf.Pad(), btf.Pad(), btf.Stack(dtype='int32'))
+    batchify_fn = btf.Tuple(btf.Pad(pad_val=0), btf.Pad(pad_val=0), btf.Stack(dtype='int32'))
     data_lengths = [max(len(d[0]), len(d[1])) for d in dataset]
     batch_sampler = nlp.data.FixedBucketSampler(lengths=data_lengths,
                                                 batch_size=args.batch_size,

diff --git a/scripts/parsing/parser/biaffine_parser.py b/scripts/parsing/parser/biaffine_parser.py
@@ -322,7 +322,7 @@ def flatten_numpy(arr):
             return arc_accuracy, rel_accuracy, overall_accuracy, outputs
         return outputs
 
-    def save_parameters(self, filename):
+    def save_parameters(self, filename):  # pylint: disable=arguments-differ
         """Save model
 
         Parameters

diff --git a/scripts/sentiment_analysis/finetune_lm.py b/scripts/sentiment_analysis/finetune_lm.py
@@ -182,7 +182,7 @@ def preprocess_dataset(dataset):
 test_dataset, test_data_lengths = preprocess_dataset(test_dataset)
 
 # Construct the DataLoader. Pad data and stack label
-batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(axis=0, ret_length=True),
+batchify_fn = nlp.data.batchify.Tuple(nlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
                                       nlp.data.batchify.Stack(dtype='float32'))
 if args.bucket_type is None:
     print('Bucketing strategy is not used!')

diff --git a/scripts/tests/test_dataprocessor.py b/scripts/tests/test_dataprocessor.py
@@ -17,12 +17,13 @@
 
 """Test DataProcessor."""
 
-
 import sys
 import os
-import pytest
+import warnings
 import time
 
+import pytest
+
 sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'machine_translation'))
 
 from ..machine_translation.dataprocessor import process_dataset
@@ -38,12 +39,16 @@ def test_toy():
     assert len(train_en_de) == 30
     assert len(val_en_de) == 30
     assert len(test_en_de) == 30
-    en_vocab, de_vocab = train_en_de.src_vocab, train_en_de.tgt_vocab
+    with warnings.catch_warnings():  # TODO https://github.com/dmlc/gluon-nlp/issues/978
+        warnings.simplefilter("ignore")
+        en_vocab, de_vocab = train_en_de.src_vocab, train_en_de.tgt_vocab
     assert len(en_vocab) == 358
     assert len(de_vocab) == 381
     train_de_en = TOY(segment='train', src_lang='de', tgt_lang='en',
                       root='tests/data/translation_test')
-    de_vocab, en_vocab = train_de_en.src_vocab, train_de_en.tgt_vocab
+    with warnings.catch_warnings():  # TODO https://github.com/dmlc/gluon-nlp/issues/978
+        warnings.simplefilter("ignore")
+        de_vocab, en_vocab = train_de_en.src_vocab, train_de_en.tgt_vocab
     assert len(en_vocab) == 358
     assert len(de_vocab) == 381
     for i in range(10):
@@ -62,7 +67,11 @@ def test_translation_preprocess():
     for (src_max_len, tgt_max_len) in max_lens:
         data_train = TOY('train', src_lang=src_lang, tgt_lang=tgt_lang)
         data_val = TOY('val', src_lang=src_lang, tgt_lang=tgt_lang)
-        src_vocab, tgt_vocab = data_train.src_vocab, data_train.tgt_vocab
+
+        # TODO https://github.com/dmlc/gluon-nlp/issues/978
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            src_vocab, tgt_vocab = data_train.src_vocab, data_train.tgt_vocab
         data_val_processed = process_dataset(data_val, src_vocab, tgt_vocab,
                                              src_max_len, tgt_max_len)
         for (src, tgt), (preprocessed_src, preprocessed_tgt) in zip(data_val, data_val_processed):

diff --git a/scripts/tests/test_encoder_decoder.py b/scripts/tests/test_encoder_decoder.py
@@ -24,7 +24,7 @@
 
 
 def test_gnmt_encoder():
-    ctx = mx.Context.default_ctx
+    ctx = mx.current_context()
     for cell_type in ["lstm", "gru", "relu_rnn", "tanh_rnn"]:
         for num_layers, num_bi_layers in [(2, 1), (3, 0)]:
             for use_residual in [False, True]:
@@ -51,7 +51,7 @@ def test_gnmt_encoder():
 
 
 def test_gnmt_encoder_decoder():
-    ctx = mx.Context.default_ctx
+    ctx = mx.current_context()
     num_hidden = 8
     encoder = GNMTEncoder(cell_type="lstm", num_layers=3, num_bi_layers=1, hidden_size=num_hidden,
                           dropout=0.0, use_residual=True, prefix='gnmt_encoder_')
@@ -99,7 +99,7 @@ def test_gnmt_encoder_decoder():
                         assert(len(additional_outputs) == 0)
 
 def test_transformer_encoder():
-    ctx = mx.Context.default_ctx
+    ctx = mx.current_context()
     for num_layers in range(1, 3):
         for output_attention in [True, False]:
             for use_residual in [False, True]:
@@ -137,7 +137,7 @@ def test_transformer_encoder():
                             assert(len(additional_outputs) == 0)
 
 def test_transformer_encoder_decoder():
-    ctx = mx.Context.default_ctx
+    ctx = mx.current_context()
     units = 16
     encoder = TransformerEncoder(num_layers=3, units=units, hidden_size=32, num_heads=8, max_length=10,
                                  dropout=0.0, use_residual=True, prefix='transformer_encoder_')

diff --git a/scripts/text_classification/fasttext_word_ngram.py b/scripts/text_classification/fasttext_word_ngram.py
@@ -285,7 +285,7 @@ def get_dataloader(train_dataset, train_data_lengths,
     """ Construct the DataLoader. Pad data, stack label and lengths"""
     bucket_num, bucket_ratio = 20, 0.2
     batchify_fn = gluonnlp.data.batchify.Tuple(
-        gluonnlp.data.batchify.Pad(axis=0, ret_length=True),
+        gluonnlp.data.batchify.Pad(axis=0, pad_val=0, ret_length=True),
         gluonnlp.data.batchify.Stack(dtype='float32'))
     batch_sampler = gluonnlp.data.sampler.FixedBucketSampler(
         train_data_lengths,

diff --git a/scripts/word_embeddings/model.py b/scripts/word_embeddings/model.py
@@ -85,8 +85,7 @@ def __init__(self, token_to_idx, output_dim, batch_size, negatives_weights,
                 dtype=dtype)
 
             self.negatives_sampler = nlp.data.UnigramCandidateSampler(
-                weights=negatives_weights**smoothing, shape=(batch_size, ),
-                dtype='int64')
+                weights=negatives_weights**smoothing, dtype='int64')
 
     def __getitem__(self, tokens):
         return self.embedding[tokens]

diff --git a/src/gluonnlp/__init__.py b/src/gluonnlp/__init__.py
@@ -31,7 +31,7 @@
 from . import initializer
 from .vocab import Vocab
 
-__version__ = '0.8.1.dev'
+__version__ = '0.9.0.dev'
 
 __all__ = ['data',
            'model',

diff --git a/src/gluonnlp/data/batchify/batchify.py b/src/gluonnlp/data/batchify/batchify.py
@@ -187,7 +187,7 @@ class Pad:
     >>> a = [1, 2, 3, 4]
     >>> b = [4, 5, 6]
     >>> c = [8, 2]
-    >>> bf.Pad()([a, b, c])
+    >>> bf.Pad(pad_val=0)([a, b, c])
     <BLANKLINE>
     [[1. 2. 3. 4.]
      [4. 5. 6. 0.]
@@ -197,7 +197,7 @@ class Pad:
     >>> a = [1, 2, 3, 4]
     >>> b = [4, 5, 6]
     >>> c = [8, 2]
-    >>> batch, length = bf.Pad(ret_length=True)([a, b, c])
+    >>> batch, length = bf.Pad(pad_val=0, ret_length=True)([a, b, c])
     >>> batch
     <BLANKLINE>
     [[1. 2. 3. 4.]
@@ -306,7 +306,7 @@ class Tuple:
     >>> a = ([1, 2, 3, 4], 0)
     >>> b = ([5, 7], 1)
     >>> c = ([1, 2, 3, 4, 5, 6, 7], 0)
-    >>> f1, f2 = bf.Tuple(bf.Pad(), bf.Stack())([a, b])
+    >>> f1, f2 = bf.Tuple(bf.Pad(pad_val=0), bf.Stack())([a, b])
     >>> f1
     <BLANKLINE>
     [[1. 2. 3. 4.]
@@ -404,7 +404,7 @@ class Dict:
     >>> a = {'data': [1, 2, 3, 4], 'label': 0}
     >>> b = {'data': [5, 7], 'label': 1}
     >>> c = {'data': [1, 2, 3, 4, 5, 6, 7], 'label': 0}
-    >>> batchify_fn = Dict({'data': Pad(), 'label': Stack()})
+    >>> batchify_fn = Dict({'data': Pad(pad_val=0), 'label': Stack()})
     >>> sample = batchify_fn([a, b, c])
     >>> sample['data']
     <BLANKLINE>
@@ -474,7 +474,7 @@ class NamedTuple:
     >>> a = SampleData([1, 2, 3, 4], 0)
     >>> b = SampleData([5, 7], 1)
     >>> c = SampleData([1, 2, 3, 4, 5, 6, 7], 0)
-    >>> batchify_fn = NamedTuple(SampleData, {'data': Pad(), 'label': Stack()})
+    >>> batchify_fn = NamedTuple(SampleData, {'data': Pad(pad_val=0), 'label': Stack()})
     >>> sample = batchify_fn([a, b, c])
     >>> sample
     SampleData(data=
@@ -491,7 +491,7 @@ class NamedTuple:
      [1. 2. 3. 4. 5. 6. 7.]]
     <NDArray 3x7 @cpu_shared(0)>
     >>> # Let's consider to use a list
-    >>> batchify_fn = NamedTuple(SampleData, [Pad(), Stack()])
+    >>> batchify_fn = NamedTuple(SampleData, [Pad(pad_val=0), Stack()])
     >>> batchify_fn([a, b, c])
     SampleData(data=
     [[1. 2. 3. 4. 0. 0. 0.]

diff --git a/src/gluonnlp/data/candidate_sampler.py b/src/gluonnlp/data/candidate_sampler.py
@@ -18,9 +18,6 @@
 
 __all__ = ['UnigramCandidateSampler']
 
-import functools
-import operator
-
 import mxnet as mx
 import numpy as np
 
@@ -36,20 +33,15 @@ class UnigramCandidateSampler(mx.gluon.HybridBlock):
     weights : mx.nd.NDArray
         Unnormalized class probabilities. Samples are drawn and returned on the
         same context as weights.context.
-    shape : int or tuple of int
-        Shape of data to be sampled.
-        TODO: Specifying the shape is only a workaround until random_like
-        operators are available in mxnet
     dtype : str or np.dtype, default 'float32'
         Data type of the candidates. Make sure that the dtype precision is
         large enough to represent the size of your weights array precisely. For
         example, float32 can not distinguish 2**24 from 2**24 + 1.
 
     """
 
-    def __init__(self, weights, shape, dtype='float32'):
+    def __init__(self, weights, dtype='float32'):
         super(UnigramCandidateSampler, self).__init__()
-        self._shape = shape
         self._dtype = dtype
         self.N = weights.size
 
@@ -108,8 +100,6 @@ def hybrid_forward(self, F, candidates_like, prob, alias):
         ----------
         candidates_like: mxnet.nd.NDArray or mxnet.sym.Symbol
             This input specifies the shape of the to be sampled candidates. #
-            TODO shape selection is not yet supported. Shape must be specified
-            in the constructor.
 
         Returns
         -------
@@ -118,15 +108,13 @@ def hybrid_forward(self, F, candidates_like, prob, alias):
             are sampled based on the weights specified on creation of the
             UnigramCandidateSampler.
         """
-        flat_shape = functools.reduce(operator.mul, self._shape)
-        idx = F.random.uniform(low=0, high=self.N, shape=flat_shape,
-                               dtype='float64').floor()
+        candidates_flat = candidates_like.reshape((-1, )).astype('float64')
+        idx = F.random.uniform_like(candidates_flat, low=0, high=self.N).floor()
         prob = F.gather_nd(prob, idx.reshape((1, -1)))
         alias = F.gather_nd(alias, idx.reshape((1, -1)))
-        where = F.random.uniform(shape=flat_shape,
-                                 dtype='float64') < prob
+        where = F.random.uniform_like(candidates_flat) < prob
         hit = idx * where
         alt = alias * (1 - where)
-        candidates = (hit + alt).reshape(self._shape)
+        candidates = (hit + alt).reshape_like(candidates_like)
 
         return candidates.astype(self._dtype)