diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index d1d84e975290..f720a10c0022 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -250,9 +250,15 @@ The learnable parameters include both ``weight`` and ``bias``. If ``no_bias`` is set to be true, then the ``bias`` term is ignored. -Note that the operator also supports forward computation with `row_sparse` weight and bias, -where the length of `weight.indices` and `bias.indices` must be equal to `num_hidden`. -This could be used for model inference with `row_sparse` weights trained with `SparseEmbedding`. +.. Note:: + + The sparse support for FullyConnected is limited to forward evaluation with `row_sparse` + weight and bias, where the length of `weight.indices` and `bias.indices` must be equal + to `num_hidden`. This could be useful for model inference with `row_sparse` weights + trained with importance sampling or noise contrastive estimation. + + To compute linear transformation with 'csr' sparse data, sparse.dot is recommended instead + of sparse.FullyConnected. )code" ADD_FILELINE) .set_num_inputs([](const NodeAttrs& attrs) {