From f4ccf0ea79b8f1f78e63e5028688d8d562f6bcab Mon Sep 17 00:00:00 2001
From: Itay Bittan <itay.bittan@gmail.com>
Date: Fri, 14 Jun 2019 16:34:05 +0300
Subject: [PATCH 1/3] Fix smart_open deprecation warning globally

---
 gensim/corpora/_mmreader.c                | 196 +++++++++++--------
 gensim/corpora/_mmreader.pyx              |   2 +-
 gensim/corpora/bleicorpus.py              |  10 +-
 gensim/corpora/csvcorpus.py               |  32 ++--
 gensim/corpora/dictionary.py              |   4 +-
 gensim/corpora/hashdictionary.py          |   2 +-
 gensim/corpora/lowcorpus.py               |   8 +-
 gensim/corpora/malletcorpus.py            |   8 +-
 gensim/corpora/svmlightcorpus.py          |   6 +-
 gensim/corpora/ucicorpus.py               |   6 +-
 gensim/matutils.py                        |   4 +-
 gensim/models/deprecated/doc2vec.py       |  25 +--
 gensim/models/deprecated/keyedvectors.py  | 138 +++++++-------
 gensim/models/deprecated/old_saveload.py  |   7 +-
 gensim/models/deprecated/word2vec.py      |  29 +--
 gensim/models/doc2vec.py                  |  28 +--
 gensim/models/fasttext.py                 |   8 +-
 gensim/models/hdpmodel.py                 |   2 +-
 gensim/models/keyedvectors.py             | 221 +++++++++++-----------
 gensim/models/poincare.py                 |   9 +-
 gensim/models/utils_any2vec.py            |   8 +-
 gensim/models/word2vec.py                 |  29 +--
 gensim/models/wrappers/dtmmodel.py        |   2 +-
 gensim/models/wrappers/ldamallet.py       |   7 +-
 gensim/models/wrappers/ldavowpalwabbit.py |  14 +-
 gensim/models/wrappers/wordrank.py        |  15 +-
 gensim/parsing/preprocessing.py           |   2 +-
 gensim/scripts/glove2word2vec.py          |  10 +-
 gensim/scripts/segment_wiki.py            |  28 +--
 gensim/scripts/word2vec2tensor.py         |   4 +-
 gensim/similarities/index.py              |   6 +-
 gensim/test/test_doc2vec.py               |   6 +-
 gensim/test/test_fasttext.py              |   6 +-
 gensim/test/test_keywords.py              |  14 +-
 gensim/test/test_lee.py                   |   8 +-
 gensim/test/test_scripts.py               |  11 +-
 gensim/test/test_similarities.py          |   4 +-
 gensim/test/test_summarization.py         |   4 +-
 gensim/test/test_translation_matrix.py    |   2 +-
 gensim/test/test_utils.py                 |   8 +-
 gensim/test/test_word2vec.py              |  28 +--
 gensim/utils.py                           |  10 +-
 42 files changed, 505 insertions(+), 466 deletions(-)

diff --git a/gensim/corpora/_mmreader.c b/gensim/corpora/_mmreader.c
index 03734e0f32..d50cae7242 100644
--- a/gensim/corpora/_mmreader.c
+++ b/gensim/corpora/_mmreader.c
@@ -1444,6 +1444,7 @@ static PyObject *__pyx_builtin_ValueError;
 static PyObject *__pyx_builtin_StopIteration;
 static PyObject *__pyx_builtin_enumerate;
 static const char __pyx_k_[] = "%";
+static const char __pyx_k_rb[] = "rb";
 static const char __pyx_k_new[] = "__new__";
 static const char __pyx_k_six[] = "six";
 static const char __pyx_k_args[] = "args";
@@ -1453,6 +1454,7 @@ static const char __pyx_k_info[] = "info";
 static const char __pyx_k_iter[] = "__iter__";
 static const char __pyx_k_main[] = "__main__";
 static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_open[] = "open";
 static const char __pyx_k_seek[] = "seek";
 static const char __pyx_k_send[] = "send";
 static const char __pyx_k_test[] = "__test__";
@@ -1486,7 +1488,6 @@ static const char __pyx_k_reduce_ex[] = "__reduce_ex__";
 static const char __pyx_k_six_moves[] = "six.moves";
 static const char __pyx_k_ValueError[] = "ValueError";
 static const char __pyx_k_pyx_result[] = "__pyx_result";
-static const char __pyx_k_smart_open[] = "smart_open";
 static const char __pyx_k_startswith[] = "startswith";
 static const char __pyx_k_to_unicode[] = "to_unicode";
 static const char __pyx_k_transposed[] = "transposed";
@@ -1553,6 +1554,7 @@ static PyObject *__pyx_kp_s_matrix_columns_must_come_in_asce;
 static PyObject *__pyx_kp_s_matrixmarket_matrix_coordinate;
 static PyObject *__pyx_n_s_name;
 static PyObject *__pyx_n_s_new;
+static PyObject *__pyx_n_s_open;
 static PyObject *__pyx_n_s_open_file;
 static PyObject *__pyx_n_s_pickle;
 static PyObject *__pyx_n_s_pyx_PickleError;
@@ -1562,6 +1564,7 @@ static PyObject *__pyx_n_s_pyx_state;
 static PyObject *__pyx_n_s_pyx_type;
 static PyObject *__pyx_n_s_pyx_unpickle_MmReader;
 static PyObject *__pyx_n_s_range;
+static PyObject *__pyx_n_s_rb;
 static PyObject *__pyx_n_s_reduce;
 static PyObject *__pyx_n_s_reduce_cython;
 static PyObject *__pyx_n_s_reduce_ex;
@@ -1572,7 +1575,6 @@ static PyObject *__pyx_n_s_setstate_cython;
 static PyObject *__pyx_n_s_six;
 static PyObject *__pyx_n_s_six_moves;
 static PyObject *__pyx_n_s_skip_headers;
-static PyObject *__pyx_n_s_smart_open;
 static PyObject *__pyx_n_s_split;
 static PyObject *__pyx_n_s_startswith;
 static PyObject *__pyx_n_s_string_types;
@@ -4128,13 +4130,14 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
   PyObject *__pyx_t_3 = NULL;
   int __pyx_t_4;
   PyObject *__pyx_t_5 = NULL;
-  PY_LONG_LONG __pyx_t_6;
-  Py_ssize_t __pyx_t_7;
-  PyObject *(*__pyx_t_8)(PyObject *);
-  char const *__pyx_t_9;
-  PyObject *__pyx_t_10 = NULL;
-  PY_LONG_LONG __pyx_t_11;
-  int __pyx_t_12;
+  int __pyx_t_6;
+  PyObject *__pyx_t_7 = NULL;
+  PY_LONG_LONG __pyx_t_8;
+  Py_ssize_t __pyx_t_9;
+  PyObject *(*__pyx_t_10)(PyObject *);
+  char const *__pyx_t_11;
+  PY_LONG_LONG __pyx_t_12;
+  int __pyx_t_13;
   __Pyx_RefNannySetupContext("docbyoffset", 0);
 
   /* "gensim/corpora/_mmreader.pyx":188
@@ -4155,7 +4158,7 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *         if offset == -1:
  *             return []             # <<<<<<<<<<<<<<
  *         if isinstance(self.input, string_types):
- *             fin, close_fin = utils.smart_open(self.input), True
+ *             fin, close_fin = utils.open(self.input, 'rb'), True
  */
     __Pyx_XDECREF(__pyx_r);
     __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 189, __pyx_L1_error)
@@ -4177,7 +4180,7 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *         if offset == -1:
  *             return []
  *         if isinstance(self.input, string_types):             # <<<<<<<<<<<<<<
- *             fin, close_fin = utils.smart_open(self.input), True
+ *             fin, close_fin = utils.open(self.input, 'rb'), True
  *         else:
  */
   __pyx_t_1 = __pyx_v_self->input;
@@ -4193,16 +4196,17 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
     /* "gensim/corpora/_mmreader.pyx":191
  *             return []
  *         if isinstance(self.input, string_types):
- *             fin, close_fin = utils.smart_open(self.input), True             # <<<<<<<<<<<<<<
+ *             fin, close_fin = utils.open(self.input, 'rb'), True             # <<<<<<<<<<<<<<
  *         else:
  *             fin, close_fin = self.input, False
  */
     __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_utils); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 191, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_1);
-    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_smart_open); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 191, __pyx_L1_error)
+    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_open); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 191, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_5);
     __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __pyx_t_1 = NULL;
+    __pyx_t_6 = 0;
     if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) {
       __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
       if (likely(__pyx_t_1)) {
@@ -4210,12 +4214,41 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
         __Pyx_INCREF(__pyx_t_1);
         __Pyx_INCREF(function);
         __Pyx_DECREF_SET(__pyx_t_5, function);
+        __pyx_t_6 = 1;
       }
     }
-    __pyx_t_3 = (__pyx_t_1) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_1, __pyx_v_self->input) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_self->input);
-    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
-    if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 191, __pyx_L1_error)
-    __Pyx_GOTREF(__pyx_t_3);
+    #if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(__pyx_t_5)) {
+      PyObject *__pyx_temp[3] = {__pyx_t_1, __pyx_v_self->input, __pyx_n_s_rb};
+      __pyx_t_3 = __Pyx_PyFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 191, __pyx_L1_error)
+      __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+      __Pyx_GOTREF(__pyx_t_3);
+    } else
+    #endif
+    #if CYTHON_FAST_PYCCALL
+    if (__Pyx_PyFastCFunction_Check(__pyx_t_5)) {
+      PyObject *__pyx_temp[3] = {__pyx_t_1, __pyx_v_self->input, __pyx_n_s_rb};
+      __pyx_t_3 = __Pyx_PyCFunction_FastCall(__pyx_t_5, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 191, __pyx_L1_error)
+      __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+      __Pyx_GOTREF(__pyx_t_3);
+    } else
+    #endif
+    {
+      __pyx_t_7 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 191, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_7);
+      if (__pyx_t_1) {
+        __Pyx_GIVEREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_1); __pyx_t_1 = NULL;
+      }
+      __Pyx_INCREF(__pyx_v_self->input);
+      __Pyx_GIVEREF(__pyx_v_self->input);
+      PyTuple_SET_ITEM(__pyx_t_7, 0+__pyx_t_6, __pyx_v_self->input);
+      __Pyx_INCREF(__pyx_n_s_rb);
+      __Pyx_GIVEREF(__pyx_n_s_rb);
+      PyTuple_SET_ITEM(__pyx_t_7, 1+__pyx_t_6, __pyx_n_s_rb);
+      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_t_7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 191, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+    }
     __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
     __pyx_t_4 = 1;
     __pyx_v_fin = __pyx_t_3;
@@ -4226,14 +4259,14 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *         if offset == -1:
  *             return []
  *         if isinstance(self.input, string_types):             # <<<<<<<<<<<<<<
- *             fin, close_fin = utils.smart_open(self.input), True
+ *             fin, close_fin = utils.open(self.input, 'rb'), True
  *         else:
  */
     goto __pyx_L4;
   }
 
   /* "gensim/corpora/_mmreader.pyx":193
- *             fin, close_fin = utils.smart_open(self.input), True
+ *             fin, close_fin = utils.open(self.input, 'rb'), True
  *         else:
  *             fin, close_fin = self.input, False             # <<<<<<<<<<<<<<
  * 
@@ -4258,18 +4291,18 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  */
   __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_fin, __pyx_n_s_seek); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 195, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_5);
-  __pyx_t_1 = NULL;
+  __pyx_t_7 = NULL;
   if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) {
-    __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_5);
-    if (likely(__pyx_t_1)) {
+    __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_5);
+    if (likely(__pyx_t_7)) {
       PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5);
-      __Pyx_INCREF(__pyx_t_1);
+      __Pyx_INCREF(__pyx_t_7);
       __Pyx_INCREF(function);
       __Pyx_DECREF_SET(__pyx_t_5, function);
     }
   }
-  __pyx_t_3 = (__pyx_t_1) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_1, __pyx_v_offset) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_offset);
-  __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_3 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_7, __pyx_v_offset) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_offset);
+  __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0;
   if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 195, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_3);
   __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
@@ -4282,10 +4315,10 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *         for line in fin:
  *             if (sscanf(line, "%lld %lld %lg", &docid, &termid, &val) != 3):
  */
-  __pyx_t_6 = -1LL;
+  __pyx_t_8 = -1LL;
   __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 196, __pyx_L1_error)
   __Pyx_GOTREF(__pyx_t_3);
-  __pyx_v_previd = __pyx_t_6;
+  __pyx_v_previd = __pyx_t_8;
   __pyx_v_document = ((PyObject*)__pyx_t_3);
   __pyx_t_3 = 0;
 
@@ -4297,34 +4330,34 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *                 raise ValueError("unable to parse line: {}".format(line))
  */
   if (likely(PyList_CheckExact(__pyx_v_fin)) || PyTuple_CheckExact(__pyx_v_fin)) {
-    __pyx_t_3 = __pyx_v_fin; __Pyx_INCREF(__pyx_t_3); __pyx_t_7 = 0;
-    __pyx_t_8 = NULL;
+    __pyx_t_3 = __pyx_v_fin; __Pyx_INCREF(__pyx_t_3); __pyx_t_9 = 0;
+    __pyx_t_10 = NULL;
   } else {
-    __pyx_t_7 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_fin); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error)
+    __pyx_t_9 = -1; __pyx_t_3 = PyObject_GetIter(__pyx_v_fin); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 197, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_3);
-    __pyx_t_8 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 197, __pyx_L1_error)
+    __pyx_t_10 = Py_TYPE(__pyx_t_3)->tp_iternext; if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 197, __pyx_L1_error)
   }
   for (;;) {
-    if (likely(!__pyx_t_8)) {
+    if (likely(!__pyx_t_10)) {
       if (likely(PyList_CheckExact(__pyx_t_3))) {
-        if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_3)) break;
+        if (__pyx_t_9 >= PyList_GET_SIZE(__pyx_t_3)) break;
         #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-        __pyx_t_5 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error)
+        __pyx_t_5 = PyList_GET_ITEM(__pyx_t_3, __pyx_t_9); __Pyx_INCREF(__pyx_t_5); __pyx_t_9++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error)
         #else
-        __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_9); __pyx_t_9++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_5);
         #endif
       } else {
-        if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
+        if (__pyx_t_9 >= PyTuple_GET_SIZE(__pyx_t_3)) break;
         #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
-        __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_7); __Pyx_INCREF(__pyx_t_5); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error)
+        __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_3, __pyx_t_9); __Pyx_INCREF(__pyx_t_5); __pyx_t_9++; if (unlikely(0 < 0)) __PYX_ERR(0, 197, __pyx_L1_error)
         #else
-        __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
+        __pyx_t_5 = PySequence_ITEM(__pyx_t_3, __pyx_t_9); __pyx_t_9++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 197, __pyx_L1_error)
         __Pyx_GOTREF(__pyx_t_5);
         #endif
       }
     } else {
-      __pyx_t_5 = __pyx_t_8(__pyx_t_3);
+      __pyx_t_5 = __pyx_t_10(__pyx_t_3);
       if (unlikely(!__pyx_t_5)) {
         PyObject* exc_type = PyErr_Occurred();
         if (exc_type) {
@@ -4345,8 +4378,8 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *                 raise ValueError("unable to parse line: {}".format(line))
  * 
  */
-    __pyx_t_9 = __Pyx_PyObject_AsString(__pyx_v_line); if (unlikely((!__pyx_t_9) && PyErr_Occurred())) __PYX_ERR(0, 198, __pyx_L1_error)
-    __pyx_t_4 = ((sscanf(__pyx_t_9, ((char const *)"%lld %lld %lg"), (&__pyx_v_docid), (&__pyx_v_termid), (&__pyx_v_val)) != 3) != 0);
+    __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_line); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) __PYX_ERR(0, 198, __pyx_L1_error)
+    __pyx_t_4 = ((sscanf(__pyx_t_11, ((char const *)"%lld %lld %lg"), (&__pyx_v_docid), (&__pyx_v_termid), (&__pyx_v_val)) != 3) != 0);
     if (unlikely(__pyx_t_4)) {
 
       /* "gensim/corpora/_mmreader.pyx":199
@@ -4356,28 +4389,28 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  * 
  *             if not self.transposed:
  */
-      __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_unable_to_parse_line, __pyx_n_s_format); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error)
-      __Pyx_GOTREF(__pyx_t_1);
-      __pyx_t_10 = NULL;
-      if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) {
-        __pyx_t_10 = PyMethod_GET_SELF(__pyx_t_1);
-        if (likely(__pyx_t_10)) {
-          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
-          __Pyx_INCREF(__pyx_t_10);
+      __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_kp_s_unable_to_parse_line, __pyx_n_s_format); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 199, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_7);
+      __pyx_t_1 = NULL;
+      if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_7))) {
+        __pyx_t_1 = PyMethod_GET_SELF(__pyx_t_7);
+        if (likely(__pyx_t_1)) {
+          PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7);
+          __Pyx_INCREF(__pyx_t_1);
           __Pyx_INCREF(function);
-          __Pyx_DECREF_SET(__pyx_t_1, function);
+          __Pyx_DECREF_SET(__pyx_t_7, function);
         }
       }
-      __pyx_t_5 = (__pyx_t_10) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_10, __pyx_v_line) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_v_line);
-      __Pyx_XDECREF(__pyx_t_10); __pyx_t_10 = 0;
+      __pyx_t_5 = (__pyx_t_1) ? __Pyx_PyObject_Call2Args(__pyx_t_7, __pyx_t_1, __pyx_v_line) : __Pyx_PyObject_CallOneArg(__pyx_t_7, __pyx_v_line);
+      __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
       if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 199, __pyx_L1_error)
       __Pyx_GOTREF(__pyx_t_5);
-      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-      __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 199, __pyx_L1_error)
-      __Pyx_GOTREF(__pyx_t_1);
+      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+      __pyx_t_7 = __Pyx_PyObject_CallOneArg(__pyx_builtin_ValueError, __pyx_t_5); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 199, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_7);
       __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
-      __Pyx_Raise(__pyx_t_1, 0, 0, 0);
-      __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+      __Pyx_Raise(__pyx_t_7, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
       __PYX_ERR(0, 199, __pyx_L1_error)
 
       /* "gensim/corpora/_mmreader.pyx":198
@@ -4406,10 +4439,10 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  * 
  *             # -1 because matrix market indexes are 1-based => convert to 0-based
  */
-      __pyx_t_6 = __pyx_v_docid;
-      __pyx_t_11 = __pyx_v_termid;
-      __pyx_v_termid = __pyx_t_6;
-      __pyx_v_docid = __pyx_t_11;
+      __pyx_t_8 = __pyx_v_docid;
+      __pyx_t_12 = __pyx_v_termid;
+      __pyx_v_termid = __pyx_t_8;
+      __pyx_v_docid = __pyx_t_12;
 
       /* "gensim/corpora/_mmreader.pyx":201
  *                 raise ValueError("unable to parse line: {}".format(line))
@@ -4517,20 +4550,20 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  * 
  *         if close_fin:
  */
-    __pyx_t_1 = __Pyx_PyInt_From_PY_LONG_LONG(__pyx_v_termid); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 214, __pyx_L1_error)
-    __Pyx_GOTREF(__pyx_t_1);
+    __pyx_t_7 = __Pyx_PyInt_From_PY_LONG_LONG(__pyx_v_termid); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 214, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_7);
     __pyx_t_5 = PyFloat_FromDouble(__pyx_v_val); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 214, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_5);
-    __pyx_t_10 = PyTuple_New(2); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 214, __pyx_L1_error)
-    __Pyx_GOTREF(__pyx_t_10);
-    __Pyx_GIVEREF(__pyx_t_1);
-    PyTuple_SET_ITEM(__pyx_t_10, 0, __pyx_t_1);
+    __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 214, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
+    __Pyx_GIVEREF(__pyx_t_7);
+    PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_7);
     __Pyx_GIVEREF(__pyx_t_5);
-    PyTuple_SET_ITEM(__pyx_t_10, 1, __pyx_t_5);
-    __pyx_t_1 = 0;
+    PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_5);
+    __pyx_t_7 = 0;
     __pyx_t_5 = 0;
-    __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_document, __pyx_t_10); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 214, __pyx_L1_error)
-    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+    __pyx_t_13 = __Pyx_PyList_Append(__pyx_v_document, __pyx_t_1); if (unlikely(__pyx_t_13 == ((int)-1))) __PYX_ERR(0, 214, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 
     /* "gensim/corpora/_mmreader.pyx":197
  *         fin.seek(offset)  # works for gzip/bz2 input, too
@@ -4559,23 +4592,23 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
  *             fin.close()             # <<<<<<<<<<<<<<
  *         return document
  */
-    __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_fin, __pyx_n_s_close); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 217, __pyx_L1_error)
-    __Pyx_GOTREF(__pyx_t_10);
+    __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_fin, __pyx_n_s_close); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 217, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_1);
     __pyx_t_5 = NULL;
-    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_10))) {
-      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_10);
+    if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) {
+      __pyx_t_5 = PyMethod_GET_SELF(__pyx_t_1);
       if (likely(__pyx_t_5)) {
-        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_10);
+        PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
         __Pyx_INCREF(__pyx_t_5);
         __Pyx_INCREF(function);
-        __Pyx_DECREF_SET(__pyx_t_10, function);
+        __Pyx_DECREF_SET(__pyx_t_1, function);
       }
     }
-    __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_CallOneArg(__pyx_t_10, __pyx_t_5) : __Pyx_PyObject_CallNoArg(__pyx_t_10);
+    __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_5) : __Pyx_PyObject_CallNoArg(__pyx_t_1);
     __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0;
     if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 217, __pyx_L1_error)
     __Pyx_GOTREF(__pyx_t_3);
-    __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0;
+    __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
     __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
 
     /* "gensim/corpora/_mmreader.pyx":216
@@ -4610,7 +4643,7 @@ static PyObject *__pyx_pf_6gensim_7corpora_9_mmreader_8MmReader_11docbyoffset(st
   __Pyx_XDECREF(__pyx_t_1);
   __Pyx_XDECREF(__pyx_t_3);
   __Pyx_XDECREF(__pyx_t_5);
-  __Pyx_XDECREF(__pyx_t_10);
+  __Pyx_XDECREF(__pyx_t_7);
   __Pyx_AddTraceback("gensim.corpora._mmreader.MmReader.docbyoffset", __pyx_clineno, __pyx_lineno, __pyx_filename);
   __pyx_r = NULL;
   __pyx_L0:;
@@ -6374,6 +6407,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_kp_s_matrixmarket_matrix_coordinate, __pyx_k_matrixmarket_matrix_coordinate, sizeof(__pyx_k_matrixmarket_matrix_coordinate), 0, 0, 1, 0},
   {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
   {&__pyx_n_s_new, __pyx_k_new, sizeof(__pyx_k_new), 0, 0, 1, 1},
+  {&__pyx_n_s_open, __pyx_k_open, sizeof(__pyx_k_open), 0, 0, 1, 1},
   {&__pyx_n_s_open_file, __pyx_k_open_file, sizeof(__pyx_k_open_file), 0, 0, 1, 1},
   {&__pyx_n_s_pickle, __pyx_k_pickle, sizeof(__pyx_k_pickle), 0, 0, 1, 1},
   {&__pyx_n_s_pyx_PickleError, __pyx_k_pyx_PickleError, sizeof(__pyx_k_pyx_PickleError), 0, 0, 1, 1},
@@ -6383,6 +6417,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s_pyx_type, __pyx_k_pyx_type, sizeof(__pyx_k_pyx_type), 0, 0, 1, 1},
   {&__pyx_n_s_pyx_unpickle_MmReader, __pyx_k_pyx_unpickle_MmReader, sizeof(__pyx_k_pyx_unpickle_MmReader), 0, 0, 1, 1},
   {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
+  {&__pyx_n_s_rb, __pyx_k_rb, sizeof(__pyx_k_rb), 0, 0, 1, 1},
   {&__pyx_n_s_reduce, __pyx_k_reduce, sizeof(__pyx_k_reduce), 0, 0, 1, 1},
   {&__pyx_n_s_reduce_cython, __pyx_k_reduce_cython, sizeof(__pyx_k_reduce_cython), 0, 0, 1, 1},
   {&__pyx_n_s_reduce_ex, __pyx_k_reduce_ex, sizeof(__pyx_k_reduce_ex), 0, 0, 1, 1},
@@ -6393,7 +6428,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
   {&__pyx_n_s_six, __pyx_k_six, sizeof(__pyx_k_six), 0, 0, 1, 1},
   {&__pyx_n_s_six_moves, __pyx_k_six_moves, sizeof(__pyx_k_six_moves), 0, 0, 1, 1},
   {&__pyx_n_s_skip_headers, __pyx_k_skip_headers, sizeof(__pyx_k_skip_headers), 0, 0, 1, 1},
-  {&__pyx_n_s_smart_open, __pyx_k_smart_open, sizeof(__pyx_k_smart_open), 0, 0, 1, 1},
   {&__pyx_n_s_split, __pyx_k_split, sizeof(__pyx_k_split), 0, 0, 1, 1},
   {&__pyx_n_s_startswith, __pyx_k_startswith, sizeof(__pyx_k_startswith), 0, 0, 1, 1},
   {&__pyx_n_s_string_types, __pyx_k_string_types, sizeof(__pyx_k_string_types), 0, 0, 1, 1},
diff --git a/gensim/corpora/_mmreader.pyx b/gensim/corpora/_mmreader.pyx
index 36cf11a1b9..1fff966760 100644
--- a/gensim/corpora/_mmreader.pyx
+++ b/gensim/corpora/_mmreader.pyx
@@ -188,7 +188,7 @@ cdef class MmReader(object):
         if offset == -1:
             return []
         if isinstance(self.input, string_types):
-            fin, close_fin = utils.smart_open(self.input), True
+            fin, close_fin = utils.open(self.input, 'rb'), True
         else:
             fin, close_fin = self.input, False
 
diff --git a/gensim/corpora/bleicorpus.py b/gensim/corpora/bleicorpus.py
index 701831b1b1..1afde870d2 100644
--- a/gensim/corpora/bleicorpus.py
+++ b/gensim/corpora/bleicorpus.py
@@ -74,7 +74,7 @@ def __init__(self, fname, fname_vocab=None):
                 raise IOError('BleiCorpus: could not find vocabulary file')
 
         self.fname = fname
-        with utils.smart_open(fname_vocab) as fin:
+        with utils.open(fname_vocab, 'rb') as fin:
             words = [utils.to_unicode(word).rstrip() for word in fin]
         self.id2word = dict(enumerate(words))
 
@@ -88,7 +88,7 @@ def __iter__(self):
 
         """
         lineno = -1
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             for lineno, line in enumerate(fin):
                 yield self.line2doc(line)
         self.length = lineno + 1
@@ -149,7 +149,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
             num_terms = 0
 
         logger.info("storing corpus in Blei's LDA-C format into %s", fname)
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             offsets = []
             for doc in corpus:
                 doc = list(doc)
@@ -160,7 +160,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
         # write out vocabulary, in a format compatible with Blei's topics.py script
         fname_vocab = utils.smart_extension(fname, '.vocab')
         logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
-        with utils.smart_open(fname_vocab, 'wb') as fout:
+        with utils.open(fname_vocab, 'wb') as fout:
             for featureid in range(num_terms):
                 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
 
@@ -181,6 +181,6 @@ def docbyoffset(self, offset):
             Document in BoW format.
 
         """
-        with utils.smart_open(self.fname) as f:
+        with utils.open(self.fname, 'rb') as f:
             f.seek(offset)
             return self.line2doc(f.readline())
diff --git a/gensim/corpora/csvcorpus.py b/gensim/corpora/csvcorpus.py
index 16a88a93e9..59fbbe16f2 100644
--- a/gensim/corpora/csvcorpus.py
+++ b/gensim/corpora/csvcorpus.py
@@ -45,10 +45,11 @@ def __init__(self, fname, labels):
         self.labels = labels
 
         # load the first few lines, to guess the CSV dialect
-        head = ''.join(itertools.islice(utils.smart_open(self.fname), 5))
-        self.headers = csv.Sniffer().has_header(head)
-        self.dialect = csv.Sniffer().sniff(head)
-        logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)
+        with utils.open(self.fname, 'rb') as f:
+            head = ''.join(itertools.islice(f, 5))
+            self.headers = csv.Sniffer().has_header(head)
+            self.dialect = csv.Sniffer().sniff(head)
+            logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)
 
     def __iter__(self):
         """Iterate over the corpus, returning one BoW vector at a time.
@@ -59,14 +60,15 @@ def __iter__(self):
             Document in BoW format.
 
         """
-        reader = csv.reader(utils.smart_open(self.fname), self.dialect)
-        if self.headers:
-            next(reader)    # skip the headers
-
-        line_no = -1
-        for line_no, line in enumerate(reader):
-            if self.labels:
-                line.pop(0)  # ignore the first column = class label
-            yield list(enumerate(float(x) for x in line))
-
-        self.length = line_no + 1  # store the total number of CSV rows = documents
+        with utils.open(self.fname, 'rb') as f:
+            reader = csv.reader(f, self.dialect)
+            if self.headers:
+                next(reader)    # skip the headers
+
+            line_no = -1
+            for line_no, line in enumerate(reader):
+                if self.labels:
+                    line.pop(0)  # ignore the first column = class label
+                yield list(enumerate(float(x) for x in line))
+
+            self.length = line_no + 1  # store the total number of CSV rows = documents
diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py
index 21df726f3d..561d61babb 100644
--- a/gensim/corpora/dictionary.py
+++ b/gensim/corpora/dictionary.py
@@ -516,7 +516,7 @@ def save_as_text(self, fname, sort_by_word=True):
 
         """
         logger.info("saving dictionary mapping to %s", fname)
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             numdocs_line = "%d\n" % self.num_docs
             fout.write(utils.to_utf8(numdocs_line))
             if sort_by_word:
@@ -669,7 +669,7 @@ def load_from_text(fname):
 
         """
         result = Dictionary()
-        with utils.smart_open(fname) as f:
+        with utils.open(fname, 'rb') as f:
             for lineno, line in enumerate(f):
                 line = utils.to_unicode(line)
                 if lineno == 0:
diff --git a/gensim/corpora/hashdictionary.py b/gensim/corpora/hashdictionary.py
index 87c76c590c..cb3f4053ea 100644
--- a/gensim/corpora/hashdictionary.py
+++ b/gensim/corpora/hashdictionary.py
@@ -341,7 +341,7 @@ def save_as_text(self, fname):
 
         """
         logger.info("saving %s mapping to %s" % (self, fname))
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             for tokenid in self.keys():
                 words = sorted(self[tokenid])
                 if words:
diff --git a/gensim/corpora/lowcorpus.py b/gensim/corpora/lowcorpus.py
index c67c34b700..d52f190187 100644
--- a/gensim/corpora/lowcorpus.py
+++ b/gensim/corpora/lowcorpus.py
@@ -131,7 +131,7 @@ def _calculate_num_docs(self):
 
         """
         # the first line in input data is the number of documents (integer). throws exception on bad input.
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             try:
                 result = int(next(fin))
             except StopIteration:
@@ -191,7 +191,7 @@ def __iter__(self):
             Document in BoW format.
 
         """
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             for lineno, line in enumerate(fin):
                 if lineno > 0:  # ignore the first line = number of documents
                     yield self.line2doc(line)
@@ -231,7 +231,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
         logger.info("storing corpus in List-Of-Words format into %s" % fname)
         truncated = 0
         offsets = []
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             fout.write(utils.to_utf8('%i\n' % len(corpus)))
             for doc in corpus:
                 words = []
@@ -277,7 +277,7 @@ def docbyoffset(self, offset):
             [(0, 1), (3, 1), (4, 1)]
 
         """
-        with utils.smart_open(self.fname) as f:
+        with utils.open(self.fname, 'rb') as f:
             f.seek(offset)
             return self.line2doc(f.readline())
 
diff --git a/gensim/corpora/malletcorpus.py b/gensim/corpora/malletcorpus.py
index c9a82fffbb..2b83a90bb1 100644
--- a/gensim/corpora/malletcorpus.py
+++ b/gensim/corpora/malletcorpus.py
@@ -83,7 +83,7 @@ def _calculate_num_docs(self):
             Number of documents in file.
 
         """
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             result = sum(1 for _ in fin)
         return result
 
@@ -96,7 +96,7 @@ def __iter__(self):
             Document in BoW format (+"document_id" and "lang" if metadata=True).
 
         """
-        with utils.smart_open(self.fname) as f:
+        with utils.open(self.fname, 'rb') as f:
             for line in f:
                 yield self.line2doc(line)
 
@@ -180,7 +180,7 @@ def save_corpus(fname, corpus, id2word=None, metadata=False):
 
         truncated = 0
         offsets = []
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             for doc_id, doc in enumerate(corpus):
                 if metadata:
                     doc_id, doc_lang = doc[1]
@@ -231,6 +231,6 @@ def docbyoffset(self, offset):
             [(4, 1)]
 
         """
-        with utils.smart_open(self.fname) as f:
+        with utils.open(self.fname, 'rb') as f:
             f.seek(offset)
             return self.line2doc(f.readline())
diff --git a/gensim/corpora/svmlightcorpus.py b/gensim/corpora/svmlightcorpus.py
index 153bd973e0..5f0b049b07 100644
--- a/gensim/corpora/svmlightcorpus.py
+++ b/gensim/corpora/svmlightcorpus.py
@@ -74,7 +74,7 @@ def __iter__(self):
         """
         lineno = -1
         self.labels = []
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             for lineno, line in enumerate(fin):
                 doc = self.line2doc(line)
                 if doc is not None:
@@ -115,7 +115,7 @@ def save_corpus(fname, corpus, id2word=None, labels=False, metadata=False):
             # Cast any sequence (incl. a numpy array) to a list, to simplify the processing below.
             labels = list(labels)
         offsets = []
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             for docno, doc in enumerate(corpus):
                 label = labels[docno] if labels else 0  # target class is 0 by default
                 offsets.append(fout.tell())
@@ -135,7 +135,7 @@ def docbyoffset(self, offset):
         tuple of (int, float)
 
         """
-        with utils.smart_open(self.fname) as f:
+        with utils.open(self.fname, 'rb') as f:
             f.seek(offset)
             return self.line2doc(f.readline())[0]
             # TODO: it brakes if gets None from line2doc
diff --git a/gensim/corpora/ucicorpus.py b/gensim/corpora/ucicorpus.py
index 9831c7bba3..6f5f2f85f3 100644
--- a/gensim/corpora/ucicorpus.py
+++ b/gensim/corpora/ucicorpus.py
@@ -39,7 +39,7 @@ def __init__(self, input):
 
         self.input = input
 
-        with utils.smart_open(self.input) as fin:
+        with utils.open(self.input, 'rb') as fin:
             self.num_docs = self.num_terms = self.num_nnz = 0
             try:
                 self.num_docs = int(next(fin).strip())
@@ -188,7 +188,7 @@ def __init__(self, fname, fname_vocab=None):
             fname_vocab = utils.smart_extension(fname, '.vocab')
 
         self.fname = fname
-        with utils.smart_open(fname_vocab) as fin:
+        with utils.open(fname_vocab, 'rb') as fin:
             words = [word.strip() for word in fin]
         self.id2word = dict(enumerate(words))
 
@@ -286,7 +286,7 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=10000, metadata=False)
         # write out vocabulary
         fname_vocab = utils.smart_extension(fname, '.vocab')
         logger.info("saving vocabulary of %i words to %s", num_terms, fname_vocab)
-        with utils.smart_open(fname_vocab, 'wb') as fout:
+        with utils.open(fname_vocab, 'wb') as fout:
             for featureid in range(num_terms):
                 fout.write(utils.to_utf8("%s\n" % id2word.get(featureid, '---')))
 
diff --git a/gensim/matutils.py b/gensim/matutils.py
index f4ce4a13d8..9f5368db58 100644
--- a/gensim/matutils.py
+++ b/gensim/matutils.py
@@ -1236,7 +1236,7 @@ def __init__(self, fname):
         self.fname = fname
         if fname.endswith(".gz") or fname.endswith('.bz2'):
             raise NotImplementedError("compressed output not supported with MmWriter")
-        self.fout = utils.smart_open(self.fname, 'wb+')  # open for both reading and writing
+        self.fout = utils.open(self.fname, 'wb+')  # open for both reading and writing
         self.headers_written = False
 
     def write_headers(self, num_docs, num_terms, num_nnz):
@@ -1574,7 +1574,7 @@ def docbyoffset(self, offset):
             if offset == -1:
                 return []
             if isinstance(self.input, string_types):
-                fin, close_fin = utils.smart_open(self.input), True
+                fin, close_fin = utils.open(self.input, 'rb'), True
             else:
                 fin, close_fin = self.input, False
 
diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py
index 76e4a7e2d4..9378b77d88 100644
--- a/gensim/models/deprecated/doc2vec.py
+++ b/gensim/models/deprecated/doc2vec.py
@@ -965,7 +965,7 @@ def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*
             KeyedVectors.save_word2vec_format(self.wv, fname, fvocab, binary, total_vec)
         # save document vectors
         if doctag_vec:
-            with utils.smart_open(fname, 'ab') as fout:
+            with utils.open(fname, 'ab') as fout:
                 if not word_vec:
                     total_vec = len(self.docvecs)
                     logger.info("storing %sx%s projection weights into %s", total_vec, self.vector_size, fname)
@@ -992,16 +992,17 @@ def __iter__(self):
             fname = os.path.join(self.dirname, fname)
             if not os.path.isfile(fname):
                 continue
-            for item_no, line in enumerate(utils.smart_open(fname)):
-                line = utils.to_unicode(line)
-                # each file line is a single document in the Brown corpus
-                # each token is WORD/POS_TAG
-                token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                if not words:  # don't bother sending out empty documents
-                    continue
-                yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)])
+            with utils.open(fname, 'rb') as f:
+                for item_no, line in enumerate(f):
+                    line = utils.to_unicode(line)
+                    # each file line is a single document in the Brown corpus
+                    # each token is WORD/POS_TAG
+                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
+                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
+                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
+                    if not words:  # don't bother sending out empty documents
+                        continue
+                    yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)])
 
 
 class TaggedLineDocument(object):
@@ -1036,6 +1037,6 @@ def __iter__(self):
                 yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
         except AttributeError:
             # If it didn't work like a file, use it as a string filename
-            with utils.smart_open(self.source) as fin:
+            with utils.open(self.source, 'rb') as fin:
                 for item_no, line in enumerate(fin):
                     yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
diff --git a/gensim/models/deprecated/keyedvectors.py b/gensim/models/deprecated/keyedvectors.py
index 5ead121e48..a8983909d0 100644
--- a/gensim/models/deprecated/keyedvectors.py
+++ b/gensim/models/deprecated/keyedvectors.py
@@ -154,12 +154,12 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None)
         vector_size = self.syn0.shape[1]
         if fvocab is not None:
             logger.info("storing vocabulary in %s", fvocab)
-            with utils.smart_open(fvocab, 'wb') as vout:
+            with utils.open(fvocab, 'wb') as vout:
                 for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                     vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
         logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
         assert (len(self.vocab), vector_size) == self.syn0.shape
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
             # store in sorted order: most frequent words at the top
             for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
@@ -204,13 +204,13 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8',
         if fvocab is not None:
             logger.info("loading word counts from %s", fvocab)
             counts = {}
-            with utils.smart_open(fvocab) as fin:
+            with utils.open(fvocab, 'rb') as fin:
                 for line in fin:
                     word, count = utils.to_unicode(line).strip().split()
                     counts[word] = int(count)
 
         logger.info("loading projection weights from %s", fname)
-        with utils.smart_open(fname) as fin:
+        with utils.open(fname, 'rb') as fin:
             header = utils.to_unicode(fin.readline(), encoding=encoding)
             vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
             if limit:
@@ -934,47 +934,48 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
         ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
 
         sections, section = [], None
-        for line_no, line in enumerate(utils.smart_open(questions)):
-            # TODO: use level3 BLAS (=evaluate multiple questions at once), for speed
-            line = utils.to_unicode(line)
-            if line.startswith(': '):
-                # a new section starts => store the old section
-                if section:
-                    sections.append(section)
-                    self.log_accuracy(section)
-                section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
-            else:
-                if not section:
-                    raise ValueError("missing section header before line #%i in %s" % (line_no, questions))
-                try:
-                    if case_insensitive:
-                        a, b, c, expected = [word.upper() for word in line.split()]
-                    else:
-                        a, b, c, expected = [word for word in line.split()]
-                except ValueError:
-                    logger.info("skipping invalid line #%i in %s", line_no, questions)
-                    continue
-                if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
-                    logger.debug("skipping line #%i with OOV words: %s", line_no, line.strip())
-                    continue
-
-                original_vocab = self.vocab
-                self.vocab = ok_vocab
-                ignore = {a, b, c}  # input words to be ignored
-                predicted = None
-                # find the most likely prediction, ignoring OOV words and input words
-                sims = most_similar(self, positive=[b, c], negative=[a], topn=False, restrict_vocab=restrict_vocab)
-                self.vocab = original_vocab
-                for index in matutils.argsort(sims, reverse=True):
-                    predicted = self.index2word[index].upper() if case_insensitive else self.index2word[index]
-                    if predicted in ok_vocab and predicted not in ignore:
-                        if predicted != expected:
-                            logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
-                        break
-                if predicted == expected:
-                    section['correct'].append((a, b, c, expected))
+        with utils.open(questions, 'rb') as f:
+            for line_no, line in enumerate(f):
+                # TODO: use level3 BLAS (=evaluate multiple questions at once), for speed
+                line = utils.to_unicode(line)
+                if line.startswith(': '):
+                    # a new section starts => store the old section
+                    if section:
+                        sections.append(section)
+                        self.log_accuracy(section)
+                    section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
                 else:
-                    section['incorrect'].append((a, b, c, expected))
+                    if not section:
+                        raise ValueError("missing section header before line #%i in %s" % (line_no, questions))
+                    try:
+                        if case_insensitive:
+                            a, b, c, expected = [word.upper() for word in line.split()]
+                        else:
+                            a, b, c, expected = [word for word in line.split()]
+                    except ValueError:
+                        logger.info("skipping invalid line #%i in %s", line_no, questions)
+                        continue
+                    if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
+                        logger.debug("skipping line #%i with OOV words: %s", line_no, line.strip())
+                        continue
+
+                    original_vocab = self.vocab
+                    self.vocab = ok_vocab
+                    ignore = {a, b, c}  # input words to be ignored
+                    predicted = None
+                    # find the most likely prediction, ignoring OOV words and input words
+                    sims = most_similar(self, positive=[b, c], negative=[a], topn=False, restrict_vocab=restrict_vocab)
+                    self.vocab = original_vocab
+                    for index in matutils.argsort(sims, reverse=True):
+                        predicted = self.index2word[index].upper() if case_insensitive else self.index2word[index]
+                        if predicted in ok_vocab and predicted not in ignore:
+                            if predicted != expected:
+                                logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
+                            break
+                    if predicted == expected:
+                        section['correct'].append((a, b, c, expected))
+                    else:
+                        section['incorrect'].append((a, b, c, expected))
         if section:
             # store the last section, too
             sections.append(section)
@@ -1030,32 +1031,33 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000,
         original_vocab = self.vocab
         self.vocab = ok_vocab
 
-        for line_no, line in enumerate(utils.smart_open(pairs)):
-            line = utils.to_unicode(line)
-            if line.startswith('#'):
-                # May be a comment
-                continue
-            else:
-                try:
-                    if case_insensitive:
-                        a, b, sim = [word.upper() for word in line.split(delimiter)]
-                    else:
-                        a, b, sim = [word for word in line.split(delimiter)]
-                    sim = float(sim)
-                except (ValueError, TypeError):
-                    logger.info('skipping invalid line #%d in %s', line_no, pairs)
+        with utils.open(pairs, 'rb') as f:
+            for line_no, line in enumerate(f):
+                line = utils.to_unicode(line)
+                if line.startswith('#'):
+                    # May be a comment
                     continue
-                if a not in ok_vocab or b not in ok_vocab:
-                    oov += 1
-                    if dummy4unknown:
-                        similarity_model.append(0.0)
-                        similarity_gold.append(sim)
-                        continue
-                    else:
-                        logger.debug('skipping line #%d with OOV words: %s', line_no, line.strip())
+                else:
+                    try:
+                        if case_insensitive:
+                            a, b, sim = [word.upper() for word in line.split(delimiter)]
+                        else:
+                            a, b, sim = [word for word in line.split(delimiter)]
+                        sim = float(sim)
+                    except (ValueError, TypeError):
+                        logger.info('skipping invalid line #%d in %s', line_no, pairs)
                         continue
-                similarity_gold.append(sim)  # Similarity from the dataset
-                similarity_model.append(self.similarity(a, b))  # Similarity from the model
+                    if a not in ok_vocab or b not in ok_vocab:
+                        oov += 1
+                        if dummy4unknown:
+                            similarity_model.append(0.0)
+                            similarity_gold.append(sim)
+                            continue
+                        else:
+                            logger.debug('skipping line #%d with OOV words: %s', line_no, line.strip())
+                            continue
+                    similarity_gold.append(sim)  # Similarity from the dataset
+                    similarity_model.append(self.similarity(a, b))  # Similarity from the model
         self.vocab = original_vocab
         spearman = stats.spearmanr(similarity_gold, similarity_model)
         pearson = stats.pearsonr(similarity_gold, similarity_model)
diff --git a/gensim/models/deprecated/old_saveload.py b/gensim/models/deprecated/old_saveload.py
index c609dd5532..750d83ed44 100644
--- a/gensim/models/deprecated/old_saveload.py
+++ b/gensim/models/deprecated/old_saveload.py
@@ -31,7 +31,7 @@
 
 from six import iteritems
 
-from smart_open import smart_open
+from gensim import utils
 
 if sys.version_info[0] >= 3:
     unicode = str
@@ -367,8 +367,7 @@ def unpickle(fname):
         Python object loaded from `fname`.
 
     """
-    with smart_open(fname, 'rb') as f:
-        # Because of loading from S3 load can't be used (missing readline in smart_open)
+    with utils.open(fname, 'rb') as f:
         file_bytes = f.read()
         file_bytes = file_bytes.replace(b'gensim.models.word2vec', b'gensim.models.deprecated.word2vec')
         file_bytes = file_bytes.replace(b'gensim.models.keyedvectors', b'gensim.models.deprecated.keyedvectors')
@@ -395,5 +394,5 @@ def pickle(obj, fname, protocol=2):
         Pickle protocol number, default is 2 to support compatible across python 2.x and 3.x.
 
     """
-    with smart_open(fname, 'wb') as fout:  # 'b' for binary, needed on Windows
+    with utils.open(fname, 'wb') as fout:  # 'b' for binary, needed on Windows
         _pickle.dump(obj, fout, protocol=protocol)
diff --git a/gensim/models/deprecated/word2vec.py b/gensim/models/deprecated/word2vec.py
index b8b04d4c10..f7c90b6981 100644
--- a/gensim/models/deprecated/word2vec.py
+++ b/gensim/models/deprecated/word2vec.py
@@ -1413,7 +1413,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
         """
         overlap_count = 0
         logger.info("loading projection weights from %s", fname)
-        with utils.smart_open(fname) as fin:
+        with utils.open(fname, 'rb') as fin:
             header = utils.to_unicode(fin.readline(), encoding=encoding)
             vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
             if not vector_size == self.vector_size:
@@ -1699,16 +1699,17 @@ def __iter__(self):
             fname = os.path.join(self.dirname, fname)
             if not os.path.isfile(fname):
                 continue
-            for line in utils.smart_open(fname):
-                line = utils.to_unicode(line)
-                # each file line is a single sentence in the Brown corpus
-                # each token is WORD/POS_TAG
-                token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                if not words:  # don't bother sending out empty sentences
-                    continue
-                yield words
+            with utils.open(fname, 'rb') as fin:
+                for line in fin:
+                    line = utils.to_unicode(line)
+                    # each file line is a single sentence in the Brown corpus
+                    # each token is WORD/POS_TAG
+                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
+                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
+                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
+                    if not words:  # don't bother sending out empty sentences
+                        continue
+                    yield words
 
 
 class Text8Corpus(object):
@@ -1722,7 +1723,7 @@ def __iter__(self):
         # the entire corpus is one gigantic line -- there are no sentence marks at all
         # so just split the sequence of tokens arbitrarily: 1 sentence = 1000 tokens
         sentence, rest = [], b''
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             while True:
                 text = rest + fin.read(8192)  # avoid loading the entire file (=1 line) into RAM
                 if text == rest:  # EOF
@@ -1778,7 +1779,7 @@ def __iter__(self):
                     i += self.max_sentence_length
         except AttributeError:
             # If it didn't work like a file, use it as a string filename
-            with utils.smart_open(self.source) as fin:
+            with utils.open(self.source, 'rb') as fin:
                 for line in itertools.islice(fin, self.limit):
                     line = utils.to_unicode(line).split()
                     i = 0
@@ -1833,7 +1834,7 @@ def __iter__(self):
         """iterate through the files"""
         for file_name in self.input_files:
             logger.info('reading file %s', file_name)
-            with utils.smart_open(file_name) as fin:
+            with utils.open(file_name, 'rb') as fin:
                 for line in itertools.islice(fin, self.limit):
                     line = utils.to_unicode(line).split()
                     i = 0
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index c5743f320a..44ad35ad9a 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -77,7 +77,7 @@
     memmap as np_memmap, vstack, integer, dtype, sum as np_sum, add as np_add, repeat as np_repeat, concatenate
 
 
-from gensim.utils import call_on_class_only
+from gensim.utils import call_on_class_only, deprecated
 from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
 from gensim.models.word2vec import Word2VecKeyedVectors, Word2VecVocab, Word2VecTrainables, train_cbow_pair,\
     train_sg_pair, train_batch_sg
@@ -86,7 +86,6 @@
 from gensim.models.base_any2vec import BaseWordEmbeddingsModel
 from gensim.models.keyedvectors import Doc2VecKeyedVectors
 from types import GeneratorType
-from gensim.utils import deprecated, smart_open
 
 logger = logging.getLogger(__name__)
 
@@ -838,7 +837,7 @@ def _get_offsets_and_start_doctags_for_corpusfile(cls, corpus_file, workers):
         offsets = []
         start_doctags = []
 
-        with smart_open(corpus_file, mode='rb') as fin:
+        with utils.open(corpus_file, mode='rb') as fin:
             curr_offset_idx = 0
             prev_filepos = 0
 
@@ -1505,16 +1504,17 @@ def __iter__(self):
             fname = os.path.join(self.dirname, fname)
             if not os.path.isfile(fname):
                 continue
-            for item_no, line in enumerate(utils.smart_open(fname)):
-                line = utils.to_unicode(line)
-                # each file line is a single document in the Brown corpus
-                # each token is WORD/POS_TAG
-                token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                if not words:  # don't bother sending out empty documents
-                    continue
-                yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)])
+            with utils.open(fname, 'rb') as fin:
+                for item_no, line in enumerate(fin):
+                    line = utils.to_unicode(line)
+                    # each file line is a single document in the Brown corpus
+                    # each token is WORD/POS_TAG
+                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
+                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
+                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
+                    if not words:  # don't bother sending out empty documents
+                        continue
+                    yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)])
 
 
 class TaggedLineDocument(object):
@@ -1562,6 +1562,6 @@ def __iter__(self):
                 yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
         except AttributeError:
             # If it didn't work like a file, use it as a string filename
-            with utils.smart_open(self.source) as fin:
+            with utils.open(self.source, 'rb') as fin:
                 for item_no, line in enumerate(fin):
                     yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
index d7690d65e0..b24dbac2a7 100644
--- a/gensim/models/fasttext.py
+++ b/gensim/models/fasttext.py
@@ -105,13 +105,13 @@
 .. sourcecode:: pycon
 
     >>> from gensim.utils import tokenize
-    >>> import smart_open
+    >>> from gensim import utils
     >>>
     >>>
     >>> class MyIter(object):
     ...     def __iter__(self):
     ...         path = datapath('crime-and-punishment.txt')
-    ...         with smart_open.smart_open(path, 'r', encoding='utf-8') as fin:
+    ...         with utils.open(path, 'r', encoding='utf-8') as fin:
     ...             for line in fin:
     ...                 yield list(tokenize(line))
     >>>
@@ -293,8 +293,8 @@
 from gensim.models.keyedvectors import FastTextKeyedVectors
 from gensim.models.base_any2vec import BaseWordEmbeddingsModel
 from gensim.models.utils_any2vec import ft_ngram_hashes
-from smart_open import smart_open
 
+from gensim import utils
 from gensim.utils import deprecated, call_on_class_only
 
 logger = logging.getLogger(__name__)
@@ -1326,7 +1326,7 @@ def _load_fasttext_format(model_file, encoding='utf-8', full_model=True):
         The loaded model.
 
     """
-    with smart_open(model_file, 'rb') as fin:
+    with utils.open(model_file, 'rb') as fin:
         m = gensim.models._fasttext_bin.load(fin, encoding=encoding, full_model=full_model)
 
     model = FastText(
diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
index dde0126209..e0ce5de3a5 100755
--- a/gensim/models/hdpmodel.py
+++ b/gensim/models/hdpmodel.py
@@ -863,7 +863,7 @@ def save_options(self):
             logger.error("cannot store options without having specified an output directory")
             return
         fname = '%s/options.dat' % self.outputdir
-        with utils.smart_open(fname, 'wb') as fout:
+        with utils.open(fname, 'wb') as fout:
             fout.write('tau: %s\n' % str(self.m_tau - 1))
             fout.write('chunksize: %s\n' % str(self.chunksize))
             fout.write('var_converge: %s\n' % str(self.m_var_converge))
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index 27cc21a8d1..b534f6a784 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -1062,52 +1062,53 @@ def evaluate_word_analogies(self, analogies, restrict_vocab=300000, case_insensi
         logger.info("Evaluating word analogies for top %i words in the model on %s", restrict_vocab, analogies)
         sections, section = [], None
         quadruplets_no = 0
-        for line_no, line in enumerate(utils.smart_open(analogies)):
-            line = utils.to_unicode(line)
-            if line.startswith(': '):
-                # a new section starts => store the old section
-                if section:
-                    sections.append(section)
-                    self._log_evaluate_word_analogies(section)
-                section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
-            else:
-                if not section:
-                    raise ValueError("Missing section header before line #%i in %s" % (line_no, analogies))
-                try:
-                    if case_insensitive:
-                        a, b, c, expected = [word.upper() for word in line.split()]
+        with utils.open(analogies, 'rb') as fin:
+            for line_no, line in enumerate(fin):
+                line = utils.to_unicode(line)
+                if line.startswith(': '):
+                    # a new section starts => store the old section
+                    if section:
+                        sections.append(section)
+                        self._log_evaluate_word_analogies(section)
+                    section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
+                else:
+                    if not section:
+                        raise ValueError("Missing section header before line #%i in %s" % (line_no, analogies))
+                    try:
+                        if case_insensitive:
+                            a, b, c, expected = [word.upper() for word in line.split()]
+                        else:
+                            a, b, c, expected = [word for word in line.split()]
+                    except ValueError:
+                        logger.info("Skipping invalid line #%i in %s", line_no, analogies)
+                        continue
+                    quadruplets_no += 1
+                    if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
+                        oov += 1
+                        if dummy4unknown:
+                            logger.debug('Zero accuracy for line #%d with OOV words: %s', line_no, line.strip())
+                            section['incorrect'].append((a, b, c, expected))
+                        else:
+                            logger.debug("Skipping line #%i with OOV words: %s", line_no, line.strip())
+                        continue
+                    original_vocab = self.vocab
+                    self.vocab = ok_vocab
+                    ignore = {a, b, c}  # input words to be ignored
+                    predicted = None
+                    # find the most likely prediction using 3CosAdd (vector offset) method
+                    # TODO: implement 3CosMul and set-based methods for solving analogies
+                    sims = self.most_similar(positive=[b, c], negative=[a], topn=5, restrict_vocab=restrict_vocab)
+                    self.vocab = original_vocab
+                    for element in sims:
+                        predicted = element[0].upper() if case_insensitive else element[0]
+                        if predicted in ok_vocab and predicted not in ignore:
+                            if predicted != expected:
+                                logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
+                            break
+                    if predicted == expected:
+                        section['correct'].append((a, b, c, expected))
                     else:
-                        a, b, c, expected = [word for word in line.split()]
-                except ValueError:
-                    logger.info("Skipping invalid line #%i in %s", line_no, analogies)
-                    continue
-                quadruplets_no += 1
-                if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
-                    oov += 1
-                    if dummy4unknown:
-                        logger.debug('Zero accuracy for line #%d with OOV words: %s', line_no, line.strip())
                         section['incorrect'].append((a, b, c, expected))
-                    else:
-                        logger.debug("Skipping line #%i with OOV words: %s", line_no, line.strip())
-                    continue
-                original_vocab = self.vocab
-                self.vocab = ok_vocab
-                ignore = {a, b, c}  # input words to be ignored
-                predicted = None
-                # find the most likely prediction using 3CosAdd (vector offset) method
-                # TODO: implement 3CosMul and set-based methods for solving analogies
-                sims = self.most_similar(positive=[b, c], negative=[a], topn=5, restrict_vocab=restrict_vocab)
-                self.vocab = original_vocab
-                for element in sims:
-                    predicted = element[0].upper() if case_insensitive else element[0]
-                    if predicted in ok_vocab and predicted not in ignore:
-                        if predicted != expected:
-                            logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
-                        break
-                if predicted == expected:
-                    section['correct'].append((a, b, c, expected))
-                else:
-                    section['incorrect'].append((a, b, c, expected))
         if section:
             # store the last section, too
             sections.append(section)
@@ -1174,46 +1175,47 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
         ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
 
         sections, section = [], None
-        for line_no, line in enumerate(utils.smart_open(questions)):
-            # TODO: use level3 BLAS (=evaluate multiple questions at once), for speed
-            line = utils.to_unicode(line)
-            if line.startswith(': '):
-                # a new section starts => store the old section
-                if section:
-                    sections.append(section)
-                    self.log_accuracy(section)
-                section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
-            else:
-                if not section:
-                    raise ValueError("Missing section header before line #%i in %s" % (line_no, questions))
-                try:
-                    if case_insensitive:
-                        a, b, c, expected = [word.upper() for word in line.split()]
-                    else:
-                        a, b, c, expected = [word for word in line.split()]
-                except ValueError:
-                    logger.info("Skipping invalid line #%i in %s", line_no, questions)
-                    continue
-                if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
-                    logger.debug("Skipping line #%i with OOV words: %s", line_no, line.strip())
-                    continue
-                original_vocab = self.vocab
-                self.vocab = ok_vocab
-                ignore = {a, b, c}  # input words to be ignored
-                predicted = None
-                # find the most likely prediction, ignoring OOV words and input words
-                sims = most_similar(self, positive=[b, c], negative=[a], topn=None, restrict_vocab=restrict_vocab)
-                self.vocab = original_vocab
-                for index in matutils.argsort(sims, reverse=True):
-                    predicted = self.index2word[index].upper() if case_insensitive else self.index2word[index]
-                    if predicted in ok_vocab and predicted not in ignore:
-                        if predicted != expected:
-                            logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
-                        break
-                if predicted == expected:
-                    section['correct'].append((a, b, c, expected))
+        with utils.open(questions, 'rb') as fin:
+            for line_no, line in enumerate(fin):
+                # TODO: use level3 BLAS (=evaluate multiple questions at once), for speed
+                line = utils.to_unicode(line)
+                if line.startswith(': '):
+                    # a new section starts => store the old section
+                    if section:
+                        sections.append(section)
+                        self.log_accuracy(section)
+                    section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
                 else:
-                    section['incorrect'].append((a, b, c, expected))
+                    if not section:
+                        raise ValueError("Missing section header before line #%i in %s" % (line_no, questions))
+                    try:
+                        if case_insensitive:
+                            a, b, c, expected = [word.upper() for word in line.split()]
+                        else:
+                            a, b, c, expected = [word for word in line.split()]
+                    except ValueError:
+                        logger.info("Skipping invalid line #%i in %s", line_no, questions)
+                        continue
+                    if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
+                        logger.debug("Skipping line #%i with OOV words: %s", line_no, line.strip())
+                        continue
+                    original_vocab = self.vocab
+                    self.vocab = ok_vocab
+                    ignore = {a, b, c}  # input words to be ignored
+                    predicted = None
+                    # find the most likely prediction, ignoring OOV words and input words
+                    sims = most_similar(self, positive=[b, c], negative=[a], topn=None, restrict_vocab=restrict_vocab)
+                    self.vocab = original_vocab
+                    for index in matutils.argsort(sims, reverse=True):
+                        predicted = self.index2word[index].upper() if case_insensitive else self.index2word[index]
+                        if predicted in ok_vocab and predicted not in ignore:
+                            if predicted != expected:
+                                logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
+                            break
+                    if predicted == expected:
+                        section['correct'].append((a, b, c, expected))
+                    else:
+                        section['incorrect'].append((a, b, c, expected))
         if section:
             # store the last section, too
             sections.append(section)
@@ -1285,33 +1287,34 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000,
         original_vocab = self.vocab
         self.vocab = ok_vocab
 
-        for line_no, line in enumerate(utils.smart_open(pairs)):
-            line = utils.to_unicode(line)
-            if line.startswith('#'):
-                # May be a comment
-                continue
-            else:
-                try:
-                    if case_insensitive:
-                        a, b, sim = [word.upper() for word in line.split(delimiter)]
-                    else:
-                        a, b, sim = [word for word in line.split(delimiter)]
-                    sim = float(sim)
-                except (ValueError, TypeError):
-                    logger.info('Skipping invalid line #%d in %s', line_no, pairs)
+        with utils.open(pairs, 'rb') as fin:
+            for line_no, line in enumerate(fin):
+                line = utils.to_unicode(line)
+                if line.startswith('#'):
+                    # May be a comment
                     continue
-                if a not in ok_vocab or b not in ok_vocab:
-                    oov += 1
-                    if dummy4unknown:
-                        logger.debug('Zero similarity for line #%d with OOV words: %s', line_no, line.strip())
-                        similarity_model.append(0.0)
-                        similarity_gold.append(sim)
-                        continue
-                    else:
-                        logger.debug('Skipping line #%d with OOV words: %s', line_no, line.strip())
+                else:
+                    try:
+                        if case_insensitive:
+                            a, b, sim = [word.upper() for word in line.split(delimiter)]
+                        else:
+                            a, b, sim = [word for word in line.split(delimiter)]
+                        sim = float(sim)
+                    except (ValueError, TypeError):
+                        logger.info('Skipping invalid line #%d in %s', line_no, pairs)
                         continue
-                similarity_gold.append(sim)  # Similarity from the dataset
-                similarity_model.append(self.similarity(a, b))  # Similarity from the model
+                    if a not in ok_vocab or b not in ok_vocab:
+                        oov += 1
+                        if dummy4unknown:
+                            logger.debug('Zero similarity for line #%d with OOV words: %s', line_no, line.strip())
+                            similarity_model.append(0.0)
+                            similarity_gold.append(sim)
+                            continue
+                        else:
+                            logger.debug('Skipping line #%d with OOV words: %s', line_no, line.strip())
+                            continue
+                    similarity_gold.append(sim)  # Similarity from the dataset
+                    similarity_model.append(self.similarity(a, b))  # Similarity from the model
         self.vocab = original_vocab
         spearman = stats.spearmanr(similarity_gold, similarity_model)
         pearson = stats.pearsonr(similarity_gold, similarity_model)
@@ -1888,7 +1891,7 @@ def save_word2vec_format(self, fname, prefix='*dt_', fvocab=None,
 
         """
         total_vec = total_vec or len(self)
-        with utils.smart_open(fname, 'ab') as fout:
+        with utils.open(fname, 'ab') as fout:
             if write_first_line:
                 logger.info("storing %sx%s projection weights into %s", total_vec, self.vectors_docs.shape[1], fname)
                 fout.write(utils.to_utf8("%s %s\n" % (total_vec, self.vectors_docs.shape[1])))
diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py
index 0c49c761f2..0040352bde 100644
--- a/gensim/models/poincare.py
+++ b/gensim/models/poincare.py
@@ -53,7 +53,6 @@
 from scipy.stats import spearmanr
 from six import string_types
 from six.moves import zip, range
-from smart_open import smart_open
 
 from gensim import utils, matutils
 from gensim.models.keyedvectors import Vocab, BaseKeyedVectors
@@ -1409,7 +1408,7 @@ def __iter__(self):
             Relation from input file.
 
         """
-        with smart_open(self.file_path) as file_obj:
+        with utils.open(self.file_path, 'rb') as file_obj:
             if sys.version_info[0] < 3:
                 lines = file_obj
             else:
@@ -1490,7 +1489,7 @@ def __init__(self, file_path, embedding):
         items = set()
         embedding_vocab = embedding.vocab
         relations = defaultdict(set)
-        with smart_open(file_path, 'r') as f:
+        with utils.open(file_path, 'r') as f:
             reader = csv.reader(f, delimiter='\t')
             for row in reader:
                 assert len(row) == 2, 'Hypernym pair has more than two items'
@@ -1598,7 +1597,7 @@ def __init__(self, train_path, test_path, embedding):
         relations = {'known': defaultdict(set), 'unknown': defaultdict(set)}
         data_files = {'known': train_path, 'unknown': test_path}
         for relation_type, data_file in data_files.items():
-            with smart_open(data_file, 'r') as f:
+            with utils.open(data_file, 'r') as f:
                 reader = csv.reader(f, delimiter='\t')
                 for row in reader:
                     assert len(row) == 2, 'Hypernym pair has more than two items'
@@ -1702,7 +1701,7 @@ def __init__(self, filepath):
 
         """
         expected_scores = {}
-        with smart_open(filepath, 'r') as f:
+        with utils.open(filepath, 'r') as f:
             reader = csv.DictReader(f, delimiter=' ')
             for row in reader:
                 word_1, word_2 = row['WORD1'], row['WORD2']
diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py
index 1d9e03647c..90d2f60fbc 100644
--- a/gensim/models/utils_any2vec.py
+++ b/gensim/models/utils_any2vec.py
@@ -274,12 +274,12 @@ def _save_word2vec_format(fname, vocab, vectors, fvocab=None, binary=False, tota
     vector_size = vectors.shape[1]
     if fvocab is not None:
         logger.info("storing vocabulary in %s", fvocab)
-        with utils.smart_open(fvocab, 'wb') as vout:
+        with utils.open(fvocab, 'wb') as vout:
             for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
                 vout.write(utils.to_utf8("%s %s\n" % (word, vocab_.count)))
     logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
     assert (len(vocab), vector_size) == vectors.shape
-    with utils.smart_open(fname, 'wb') as fout:
+    with utils.open(fname, 'wb') as fout:
         fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
         # store in sorted order: most frequent words at the top
         for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
@@ -333,13 +333,13 @@ def _load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8'
     if fvocab is not None:
         logger.info("loading word counts from %s", fvocab)
         counts = {}
-        with utils.smart_open(fvocab) as fin:
+        with utils.open(fvocab, 'rb') as fin:
             for line in fin:
                 word, count = utils.to_unicode(line).strip().split()
                 counts[word] = int(count)
 
     logger.info("loading projection weights from %s", fname)
-    with utils.smart_open(fname) as fin:
+    with utils.open(fname, 'rb') as fin:
         header = utils.to_unicode(fin.readline(), encoding=encoding)
         vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
         if limit:
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index a2da6a64ff..5fd91c75ec 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -1077,7 +1077,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
         """
         overlap_count = 0
         logger.info("loading projection weights from %s", fname)
-        with utils.smart_open(fname) as fin:
+        with utils.open(fname, 'rb') as fin:
             header = utils.to_unicode(fin.readline(), encoding=encoding)
             vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
             if not vector_size == self.wv.vector_size:
@@ -1354,16 +1354,17 @@ def __iter__(self):
             fname = os.path.join(self.dirname, fname)
             if not os.path.isfile(fname):
                 continue
-            for line in utils.smart_open(fname):
-                line = utils.to_unicode(line)
-                # each file line is a single sentence in the Brown corpus
-                # each token is WORD/POS_TAG
-                token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                if not words:  # don't bother sending out empty sentences
-                    continue
-                yield words
+            with utils.open(fname, 'rb') as fin:
+                for line in fin:
+                    line = utils.to_unicode(line)
+                    # each file line is a single sentence in the Brown corpus
+                    # each token is WORD/POS_TAG
+                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
+                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
+                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
+                    if not words:  # don't bother sending out empty sentences
+                        continue
+                    yield words
 
 
 class Text8Corpus(object):
@@ -1376,7 +1377,7 @@ def __iter__(self):
         # the entire corpus is one gigantic line -- there are no sentence marks at all
         # so just split the sequence of tokens arbitrarily: 1 sentence = 1000 tokens
         sentence, rest = [], b''
-        with utils.smart_open(self.fname) as fin:
+        with utils.open(self.fname, 'rb') as fin:
             while True:
                 text = rest + fin.read(8192)  # avoid loading the entire file (=1 line) into RAM
                 if text == rest:  # EOF
@@ -1437,7 +1438,7 @@ def __iter__(self):
                     i += self.max_sentence_length
         except AttributeError:
             # If it didn't work like a file, use it as a string filename
-            with utils.smart_open(self.source) as fin:
+            with utils.open(self.source, 'rb') as fin:
                 for line in itertools.islice(fin, self.limit):
                     line = utils.to_unicode(line).split()
                     i = 0
@@ -1493,7 +1494,7 @@ def __iter__(self):
         """iterate through the files"""
         for file_name in self.input_files:
             logger.info('reading file %s', file_name)
-            with utils.smart_open(file_name) as fin:
+            with utils.open(file_name, 'rb') as fin:
                 for line in itertools.islice(fin, self.limit):
                     line = utils.to_unicode(line).split()
                     i = 0
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
index 99601cd5c8..a0a2c7c6c5 100644
--- a/gensim/models/wrappers/dtmmodel.py
+++ b/gensim/models/wrappers/dtmmodel.py
@@ -322,7 +322,7 @@ def convert_input(self, corpus, time_slices):
         # write out the corpus in a file format that DTM understands:
         corpora.BleiCorpus.save_corpus(self.fcorpustxt(), corpus)
 
-        with utils.smart_open(self.ftimeslices(), 'wb') as fout:
+        with utils.open(self.ftimeslices(), 'wb') as fout:
             fout.write(utils.to_utf8(str(len(self.time_slices)) + "\n"))
             for sl in time_slices:
                 fout.write(utils.to_utf8(str(sl) + "\n"))
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
index c82dbf03f6..a7660b2eff 100644
--- a/gensim/models/wrappers/ldamallet.py
+++ b/gensim/models/wrappers/ldamallet.py
@@ -54,7 +54,6 @@
 from itertools import chain
 
 import numpy
-from smart_open import smart_open
 
 from gensim import utils, matutils
 from gensim.models import basemodel
@@ -245,7 +244,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
         """
         if serialize_corpus:
             logger.info("serializing temporary corpus to %s", self.fcorpustxt())
-            with smart_open(self.fcorpustxt(), 'wb') as fout:
+            with utils.open(self.fcorpustxt(), 'wb') as fout:
                 self.corpus2mallet(corpus, fout)
 
         # convert the text file above into MALLET's internal format
@@ -341,7 +340,7 @@ def load_word_topics(self):
         else:
             word2id = revdict(self.id2word)
 
-        with utils.smart_open(self.fstate()) as fin:
+        with utils.open(self.fstate(), 'rb') as fin:
             _ = next(fin)  # header
             self.alpha = numpy.fromiter(next(fin).split()[2:], dtype=float)
             assert len(self.alpha) == self.num_topics, "mismatch between MALLET vs. requested topics"
@@ -505,7 +504,7 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
 
         """
         mallet_version = self.get_version(self.mallet_path)
-        with utils.smart_open(fname) as fin:
+        with utils.open(fname, 'rb') as fin:
             for lineno, line in enumerate(fin):
                 if lineno == 0 and line.startswith(b"#doc "):
                     continue  # skip the header line if it exists
diff --git a/gensim/models/wrappers/ldavowpalwabbit.py b/gensim/models/wrappers/ldavowpalwabbit.py
index 45ba27ba47..f7c286a349 100644
--- a/gensim/models/wrappers/ldavowpalwabbit.py
+++ b/gensim/models/wrappers/ldavowpalwabbit.py
@@ -414,12 +414,12 @@ def save(self, fname, *args, **kwargs):
             # variable before serialising this object - keeps all data
             # self contained within a single serialised file
             logger.debug("Reading model bytes from '%s'", self._model_filename)
-            with utils.smart_open(self._model_filename, 'rb') as fhandle:
+            with utils.open(self._model_filename, 'rb') as fhandle:
                 self._model_data = fhandle.read()
 
         if os.path.exists(self._topics_filename):
             logger.debug("Reading topic bytes from '%s'", self._topics_filename)
-            with utils.smart_open(self._topics_filename, 'rb') as fhandle:
+            with utils.open(self._topics_filename, 'rb') as fhandle:
                 self._topics_data = fhandle.read()
 
         if 'ignore' not in kwargs:
@@ -444,13 +444,13 @@ def load(cls, fname, *args, **kwargs):
             # Vowpal Wabbit operates on its own binary model file - deserialise
             # to file at load time, making it immediately ready for use
             logger.debug("Writing model bytes to '%s'", lda_vw._model_filename)
-            with utils.smart_open(lda_vw._model_filename, 'wb') as fhandle:
+            with utils.open(lda_vw._model_filename, 'wb') as fhandle:
                 fhandle.write(lda_vw._model_data)
             lda_vw._model_data = None  # no need to keep in memory after this
 
         if lda_vw._topics_data:
             logger.debug("Writing topic bytes to '%s'", lda_vw._topics_filename)
-            with utils.smart_open(lda_vw._topics_filename, 'wb') as fhandle:
+            with utils.open(lda_vw._topics_filename, 'wb') as fhandle:
                 fhandle.write(lda_vw._topics_data)
             lda_vw._topics_data = None
 
@@ -566,7 +566,7 @@ def _load_vw_topics(self):
         """Read topics file generated by Vowpal Wabbit, convert to numpy array."""
         topics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float32)
 
-        with utils.smart_open(self._topics_filename) as topics_file:
+        with utils.open(self._topics_filename, 'rb') as topics_file:
             found_data = False
 
             for line in topics_file:
@@ -620,7 +620,7 @@ def _predict(self, chunk):
 
         predictions = numpy.zeros((corpus_size, self.num_topics), dtype=numpy.float32)
 
-        with utils.smart_open(self._predict_filename) as fhandle:
+        with utils.open(self._predict_filename, 'rb') as fhandle:
             for i, line in enumerate(fhandle):
                 predictions[i, :] = line.split()
 
@@ -796,7 +796,7 @@ def write_corpus_as_vw(corpus, filename):
     logger.debug("Writing corpus to: %s", filename)
 
     corpus_size = 0
-    with utils.smart_open(filename, 'wb') as corpus_file:
+    with utils.open(filename, 'wb') as corpus_file:
         for line in corpus_to_vw(corpus):
             corpus_file.write(line.encode('utf-8') + b'\n')
             corpus_size += 1
diff --git a/gensim/models/wrappers/wordrank.py b/gensim/models/wrappers/wordrank.py
index 018fe1f9d6..946787506f 100644
--- a/gensim/models/wrappers/wordrank.py
+++ b/gensim/models/wrappers/wordrank.py
@@ -59,7 +59,6 @@
 from gensim.models.keyedvectors import KeyedVectors
 from gensim.scripts.glove2word2vec import glove2word2vec
 
-from smart_open import smart_open
 from shutil import copyfile, rmtree
 
 
@@ -174,19 +173,19 @@ def train(cls, wr_path, corpus_file, out_name, size=100, window=15, symmetric=1,
 
         logger.info("Prepare training data (%s) using glove code", ", ".join(input_fnames))
         for command, input_fname, output_fname in zip(commands, input_fnames, output_fnames):
-            with smart_open(input_fname, 'rb') as r:
-                with smart_open(output_fname, 'wb') as w:
+            with utils.open(input_fname, 'rb') as r:
+                with utils.open(output_fname, 'wb') as w:
                     utils.check_output(w, args=command, stdin=r)
 
         logger.info("Deleting frequencies from vocab file")
-        with smart_open(vocab_file, 'wb') as w:
+        with utils.open(vocab_file, 'wb') as w:
             utils.check_output(w, args=cmd_del_vocab_freq)
 
-        with smart_open(vocab_file, 'rb') as f:
+        with utils.open(vocab_file, 'rb') as f:
             numwords = sum(1 for _ in f)
-        with smart_open(cooccurrence_shuf_file, 'rb') as f:
+        with utils.open(cooccurrence_shuf_file, 'rb') as f:
             numlines = sum(1 for _ in f)
-        with smart_open(meta_file, 'wb') as f:
+        with utils.open(meta_file, 'wb') as f:
             meta_info = "{0} {1}\n{2} {3}\n{4} {5}".format(
                 numwords, numwords, numlines, cooccurrence_shuf_file.split('/')[-1],
                 numwords, vocab_file.split('/')[-1]
@@ -284,7 +283,7 @@ def sort_embeddings(self, vocab_file):
         self.index2word = []
 
         # sort embeddings using frequency sorted vocab file in wordrank
-        with utils.smart_open(vocab_file) as fin:
+        with utils.open(vocab_file, 'rb') as fin:
             for index, line in enumerate(fin):
                 word, count = utils.to_unicode(line).strip(), vocab_size - index
                 # store word with it's count in a dict
diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py
index c8010a980a..97acef1f22 100644
--- a/gensim/parsing/preprocessing.py
+++ b/gensim/parsing/preprocessing.py
@@ -401,7 +401,7 @@ def preprocess_documents(docs):
 
 
 def read_file(path):
-    with utils.smart_open(path) as fin:
+    with utils.open(path, 'rb') as fin:
         return fin.read()
 
 
diff --git a/gensim/scripts/glove2word2vec.py b/gensim/scripts/glove2word2vec.py
index 39fb683f58..836b0e6b8f 100644
--- a/gensim/scripts/glove2word2vec.py
+++ b/gensim/scripts/glove2word2vec.py
@@ -59,7 +59,7 @@
 import logging
 import argparse
 
-from smart_open import smart_open
+from gensim import utils
 
 logger = logging.getLogger(__name__)
 
@@ -78,9 +78,9 @@ def get_glove_info(glove_file_name):
         Number of vectors (lines) of input file and its dimension.
 
     """
-    with smart_open(glove_file_name) as f:
+    with utils.open(glove_file_name, 'rb') as f:
         num_lines = sum(1 for _ in f)
-    with smart_open(glove_file_name) as f:
+    with utils.open(glove_file_name, 'rb') as f:
         num_dims = len(f.readline().split()) - 1
     return num_lines, num_dims
 
@@ -103,9 +103,9 @@ def glove2word2vec(glove_input_file, word2vec_output_file):
     """
     num_lines, num_dims = get_glove_info(glove_input_file)
     logger.info("converting %i vectors from %s to %s", num_lines, glove_input_file, word2vec_output_file)
-    with smart_open(word2vec_output_file, 'wb') as fout:
+    with utils.open(word2vec_output_file, 'wb') as fout:
         fout.write("{0} {1}\n".format(num_lines, num_dims).encode('utf-8'))
-        with smart_open(glove_input_file, 'rb') as fin:
+        with utils.open(glove_input_file, 'rb') as fin:
             for line in fin:
                 fout.write(line)
     return num_lines, num_dims
diff --git a/gensim/scripts/segment_wiki.py b/gensim/scripts/segment_wiki.py
index e4b6bd9f8d..db15619fd6 100644
--- a/gensim/scripts/segment_wiki.py
+++ b/gensim/scripts/segment_wiki.py
@@ -23,20 +23,22 @@
 
 .. sourcecode:: pycon
 
-    >>> from smart_open import smart_open
+    >>> from gensim import utils
     >>> import json
     >>>
     >>> # iterate over the plain text data we just created
-    >>> for line in smart_open('enwiki-latest.json.gz'):
-    >>>     # decode each JSON line into a Python dictionary object
-    >>>     article = json.loads(line)
+    >>> with utils.open('enwiki-latest.json.gz', 'rb') as f:
+    >>>     for line in f:
+    >>>         # decode each JSON line into a Python dictionary object
+    >>>         article = json.loads(line)
     >>>
-    >>>     # each article has a "title", a mapping of interlinks and a list of "section_titles" and "section_texts".
-    >>>     print("Article title: %s" % article['title'])
-    >>>     print("Interlinks: %s" + article['interlinks'])
-    >>>     for section_title, section_text in zip(article['section_titles'], article['section_texts']):
-    >>>         print("Section title: %s" % section_title)
-    >>>         print("Section text: %s" % section_text)
+    >>>         # each article has a "title", a mapping of interlinks and a list of "section_titles" and
+    >>>         # "section_texts".
+    >>>         print("Article title: %s" % article['title'])
+    >>>         print("Interlinks: %s" + article['interlinks'])
+    >>>         for section_title, section_text in zip(article['section_titles'], article['section_texts']):
+    >>>             print("Section title: %s" % section_title)
+    >>>             print("Section text: %s" % section_text)
 
 
 Notes
@@ -63,7 +65,7 @@
 from functools import partial
 
 from gensim.corpora.wikicorpus import IGNORED_NAMESPACES, WikiCorpus, filter_wiki, find_interlinks, get_namespace, utils
-from smart_open import smart_open
+import gensim.utils
 
 logger = logging.getLogger(__name__)
 
@@ -92,7 +94,7 @@ def segment_all_articles(file_path, min_article_character=200, workers=None, inc
         Structure contains (title, [(section_heading, section_content), ...], (Optionally) {interlinks}).
 
     """
-    with smart_open(file_path, 'rb') as xml_fileobj:
+    with gensim.utils.open(file_path, 'rb') as xml_fileobj:
         wiki_sections_corpus = _WikiSectionsCorpus(
             xml_fileobj, min_article_character=min_article_character, processes=workers,
             include_interlinks=include_interlinks)
@@ -135,7 +137,7 @@ def segment_and_write_all_articles(file_path, output_file, min_article_character
     if output_file is None:
         outfile = getattr(sys.stdout, 'buffer', sys.stdout)  # we want write bytes, so for py3 we used 'buffer'
     else:
-        outfile = smart_open(output_file, 'wb')
+        outfile = gensim.utils.open(output_file, 'wb')
 
     try:
         article_stream = segment_all_articles(file_path, min_article_character, workers=workers,
diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py
index 5bf8d2e23b..5d151aa854 100644
--- a/gensim/scripts/word2vec2tensor.py
+++ b/gensim/scripts/word2vec2tensor.py
@@ -44,8 +44,8 @@
 import logging
 import argparse
 
-from smart_open import smart_open
 import gensim
+from gensim import utils
 
 logger = logging.getLogger(__name__)
 
@@ -69,7 +69,7 @@ def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
     outfiletsv = tensor_filename + '_tensor.tsv'
     outfiletsvmeta = tensor_filename + '_metadata.tsv'
 
-    with smart_open(outfiletsv, 'wb') as file_vector, smart_open(outfiletsvmeta, 'wb') as file_metadata:
+    with utils.open(outfiletsv, 'wb') as file_vector, utils.open(outfiletsvmeta, 'wb') as file_metadata:
         for word in model.index2word:
             file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
             vector_row = '\t'.join(str(x) for x in model[word])
diff --git a/gensim/similarities/index.py b/gensim/similarities/index.py
index 681fe58ef4..1f27e6c82c 100644
--- a/gensim/similarities/index.py
+++ b/gensim/similarities/index.py
@@ -33,12 +33,12 @@
 """
 import os
 
-from smart_open import smart_open
 try:
     import cPickle as _pickle
 except ImportError:
     import pickle as _pickle
 
+from gensim import utils
 from gensim.models.doc2vec import Doc2Vec
 from gensim.models.word2vec import Word2Vec
 from gensim.models.fasttext import FastText
@@ -116,7 +116,7 @@ def save(self, fname, protocol=2):
         fname_dict = fname + '.d'
         self.index.save(fname)
         d = {'f': self.model.vector_size, 'num_trees': self.num_trees, 'labels': self.labels}
-        with smart_open(fname_dict, 'wb') as fout:
+        with utils.open(fname_dict, 'wb') as fout:
             _pickle.dump(d, fout, protocol=protocol)
 
     def load(self, fname):
@@ -153,7 +153,7 @@ def load(self, fname):
                 "Can't find index files '%s' and '%s' - Unable to restore AnnoyIndexer state." % (fname, fname_dict)
             )
         else:
-            with smart_open(fname_dict) as f:
+            with utils.open(fname_dict, 'rb') as f:
                 d = _pickle.loads(f.read())
             self.num_trees = d['num_trees']
             self.index = AnnoyIndex(d['f'])
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index 6ac510ea8e..712d9778bd 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -190,7 +190,7 @@ def test_get_offsets_and_start_doctags(self):
         lines = ['line1\n', 'line2\n', 'line3\n', 'line4\n', 'line5\n']
         tmpf = get_tmpfile('gensim_doc2vec.tst')
 
-        with utils.smart_open(tmpf, 'wb', encoding='utf8') as fout:
+        with utils.open(tmpf, 'wb', encoding='utf8') as fout:
             for line in lines:
                 fout.write(utils.any2unicode(line))
 
@@ -224,7 +224,7 @@ def test_get_offsets_and_start_doctags_win(self):
         lines = ['line1\n', 'line2\n', 'line3\n', 'line4\n', 'line5\n']
         tmpf = get_tmpfile('gensim_doc2vec.tst')
 
-        with utils.smart_open(tmpf, 'wb', encoding='utf8') as fout:
+        with utils.open(tmpf, 'wb', encoding='utf8') as fout:
             for line in lines:
                 fout.write(utils.any2unicode(line))
 
@@ -257,7 +257,7 @@ def test_cython_linesentence_readline_after_getting_offsets(self):
         lines = ['line1\n', 'line2\n', 'line3\n', 'line4\n', 'line5\n']
         tmpf = get_tmpfile('gensim_doc2vec.tst')
 
-        with utils.smart_open(tmpf, 'wb', encoding='utf8') as fout:
+        with utils.open(tmpf, 'wb', encoding='utf8') as fout:
             for line in lines:
                 fout.write(utils.any2unicode(line))
 
diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py
index 99dd9c503f..e0f7597925 100644
--- a/gensim/test/test_fasttext.py
+++ b/gensim/test/test_fasttext.py
@@ -12,8 +12,6 @@
 
 import numpy as np
 
-import smart_open
-
 from gensim import utils
 from gensim.models.word2vec import LineSentence
 from gensim.models.fasttext import FastText as FT_gensim
@@ -995,7 +993,7 @@ def setUp(self):
     def test_in_vocab(self):
         """Test for correct representation of in-vocab words."""
         native = load_native()
-        with smart_open.smart_open(datapath('toy-model.vec'), 'r', encoding='utf-8') as fin:
+        with utils.open(datapath('toy-model.vec'), 'r', encoding='utf-8') as fin:
             expected = dict(load_vec(fin))
 
         for word, expected_vector in expected.items():
@@ -1187,7 +1185,7 @@ def setUp(self):
         # ./fasttext skipgram -minCount 0 -bucket 100 -input crime-and-punishment.txt -output crime-and-punishment -dim 5  # noqa: E501
         #
         self.model = gensim.models.fasttext.load_facebook_model(datapath('crime-and-punishment.bin'))
-        with smart_open.smart_open(datapath('crime-and-punishment.vec'), 'r', encoding='utf-8') as fin:
+        with utils.open(datapath('crime-and-punishment.vec'), 'r', encoding='utf-8') as fin:
             self.expected = dict(load_vec(fin))
 
     def test_ascii(self):
diff --git a/gensim/test/test_keywords.py b/gensim/test/test_keywords.py
index 79df82fba6..6011c83df4 100644
--- a/gensim/test/test_keywords.py
+++ b/gensim/test/test_keywords.py
@@ -25,14 +25,14 @@ class TestKeywordsTest(unittest.TestCase):
     def test_text_keywords(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
             text = f.read()
 
         # calculate keywords
         generated_keywords = keywords(text, split=True)
 
         # To be compared to the reference.
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kw.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.kw.txt"), mode="r") as f:
             kw = f.read().strip().split("\n")
 
         self.assertEqual({str(x) for x in generated_keywords}, {str(x) for x in kw})
@@ -40,7 +40,7 @@ def test_text_keywords(self):
     def test_text_keywords_words(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
             text = f.read()
 
         # calculate exactly 13 keywords
@@ -51,14 +51,14 @@ def test_text_keywords_words(self):
     def test_text_keywords_pos(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
             text = f.read()
 
         # calculate keywords using only certain parts of speech
         generated_keywords_nnvbjj = keywords(text, pos_filter=['NN', 'VB', 'JJ'], ratio=0.3, split=True)
 
         # To be compared to the reference.
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.kwpos.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.kwpos.txt"), mode="r") as f:
             kw = f.read().strip().split("\n")
 
         self.assertEqual({str(x) for x in generated_keywords_nnvbjj}, {str(x) for x in kw})
@@ -66,7 +66,7 @@ def test_text_keywords_pos(self):
     def test_text_summarization_raises_exception_on_short_input_text(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "testsummarization_unrelated.txt"), mode="r") as f:
             text = f.read()
 
         # Keeps the first 8 sentences to make the text shorter.
@@ -77,7 +77,7 @@ def test_text_summarization_raises_exception_on_short_input_text(self):
     def test_keywords_ratio(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
+        with utils.open(os.path.join(pre_path, "mihalcea_tarau.txt"), mode="r") as f:
             text = f.read()
 
         # Check ratio parameter is well behaved.  Because length is taken on tokenized clean text
diff --git a/gensim/test/test_lee.py b/gensim/test/test_lee.py
index ff0de9dc3f..1eadd398a9 100644
--- a/gensim/test/test_lee.py
+++ b/gensim/test/test_lee.py
@@ -52,13 +52,13 @@ def setUp(self):
 
         # read in the corpora
         latin1 = partial(utils.to_unicode, encoding='latin1')
-        with utils.smart_open(os.path.join(pre_path, bg_corpus_file)) as f:
+        with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
             bg_corpus = preprocess_documents(latin1(line) for line in f)
-        with utils.smart_open(os.path.join(pre_path, corpus_file)) as f:
+        with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
             corpus = preprocess_documents(latin1(line) for line in f)
-        with utils.smart_open(os.path.join(pre_path, bg_corpus_file)) as f:
+        with utils.open(os.path.join(pre_path, bg_corpus_file), 'rb') as f:
             bg_corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]
-        with utils.smart_open(os.path.join(pre_path, corpus_file)) as f:
+        with utils.open(os.path.join(pre_path, corpus_file), 'rb') as f:
             corpus2 = [preprocess_string(latin1(s), filters=DEFAULT_FILTERS[:-1]) for s in f]
 
         # read the human similarity data
diff --git a/gensim/test/test_scripts.py b/gensim/test/test_scripts.py
index 2fa625e942..1e0144e2af 100644
--- a/gensim/test/test_scripts.py
+++ b/gensim/test/test_scripts.py
@@ -16,9 +16,9 @@
 import os.path
 import unittest
 
-from smart_open import smart_open
 import numpy as np
 
+from gensim import utils
 from gensim.scripts.segment_wiki import segment_all_articles, segment_and_write_all_articles
 from gensim.test.utils import datapath, get_tmpfile
 
@@ -85,7 +85,8 @@ def test_json_len(self):
         segment_and_write_all_articles(self.fname, tmpf, workers=1)
 
         expected_num_articles = 106
-        num_articles = sum(1 for line in smart_open(tmpf))
+        with utils.open(tmpf, 'rb') as f:
+            num_articles = sum(1 for line in f)
         self.assertEqual(num_articles, expected_num_articles)
 
     def test_segment_and_write_all_articles(self):
@@ -120,14 +121,14 @@ def setUp(self):
     def testConversion(self):
         word2vec2tensor(word2vec_model_path=self.datapath, tensor_filename=self.output_folder)
 
-        with smart_open(self.metadata_file, 'rb') as f:
+        with utils.open(self.metadata_file, 'rb') as f:
             metadata = f.readlines()
 
-        with smart_open(self.tensor_file, 'rb') as f:
+        with utils.open(self.tensor_file, 'rb') as f:
             vectors = f.readlines()
 
         # check if number of words and vector size in tensor file line up with word2vec
-        with smart_open(self.datapath, 'rb') as f:
+        with utils.open(self.datapath, 'rb') as f:
             first_line = f.readline().strip()
 
         number_words, vector_size = map(int, first_line.split(b' '))
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index 428431ea15..5a739db7e7 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -17,7 +17,7 @@
 import numpy
 import scipy
 
-from smart_open import smart_open
+from gensim import utils
 from gensim.corpora import Dictionary
 from gensim.models import word2vec
 from gensim.models import doc2vec
@@ -560,7 +560,7 @@ def __init__(self, fn):
                 self.fn = fn
 
             def __iter__(self):
-                with smart_open(self.fn, 'r', encoding="latin_1") as infile:
+                with utils.open(self.fn, 'r', encoding="latin_1") as infile:
                     for line in infile:
                         yield line.lower().strip().split()
 
diff --git a/gensim/test/test_summarization.py b/gensim/test/test_summarization.py
index 81a562a9d8..c7ef335323 100644
--- a/gensim/test/test_summarization.py
+++ b/gensim/test/test_summarization.py
@@ -142,7 +142,7 @@ class TestSummarizationTest(unittest.TestCase):
 
     def _get_text_from_test_data(self, file):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
-        with utils.smart_open(os.path.join(pre_path, file), mode="r") as f:
+        with utils.open(os.path.join(pre_path, file), mode="r") as f:
             return f.read()
 
     def test_text_summarization(self):
@@ -268,7 +268,7 @@ def test_keywords_runs(self):
     def test_mz_keywords(self):
         pre_path = os.path.join(os.path.dirname(__file__), 'test_data')
 
-        with utils.smart_open(os.path.join(pre_path, "head500.noblanks.cor")) as f:
+        with utils.open(os.path.join(pre_path, "head500.noblanks.cor"), 'rb') as f:
             text = utils.to_unicode(f.read())
         text = u' '.join(text.split()[:10240])
         kwds = mz_keywords(text)
diff --git a/gensim/test/test_translation_matrix.py b/gensim/test/test_translation_matrix.py
index 4f7959336e..7be7ce4b63 100644
--- a/gensim/test/test_translation_matrix.py
+++ b/gensim/test/test_translation_matrix.py
@@ -77,7 +77,7 @@ def test_translate_gc(self):
 def read_sentiment_docs(filename):
     sentiment_document = namedtuple('SentimentDocument', 'words tags')
     alldocs = []  # will hold all docs in original order
-    with utils.smart_open(filename, encoding='utf-8') as alldata:
+    with utils.open(filename, mode='rb', encoding='utf-8') as alldata:
         for line_no, line in enumerate(alldata):
             tokens = utils.to_unicode(line).split()
             words = tokens
diff --git a/gensim/test/test_utils.py b/gensim/test/test_utils.py
index c23087580f..f6d954777c 100644
--- a/gensim/test/test_utils.py
+++ b/gensim/test/test_utils.py
@@ -20,8 +20,6 @@
 
 import gensim.models.utils_any2vec
 
-import smart_open
-
 DISABLE_CYTHON_TESTS = getattr(gensim.models.utils_any2vec, 'FAST_VERSION', None) == -1
 
 
@@ -252,7 +250,7 @@ def test_save_as_line_sentence_en(self):
 
         utils.save_as_line_sentence(ref_sentences, corpus_file)
 
-        with utils.smart_open(corpus_file, encoding='utf8') as fin:
+        with utils.open(corpus_file, 'rb', encoding='utf8') as fin:
             sentences = [line.strip().split() for line in fin.read().strip().split('\n')]
             self.assertEqual(sentences, ref_sentences)
 
@@ -261,7 +259,7 @@ def test_save_as_line_sentence_ru(self):
         ref_sentences = [l.split() for l in utils.any2unicode('привет мир\nкак ты поживаешь').split('\n')]
         utils.save_as_line_sentence(ref_sentences, corpus_file)
 
-        with utils.smart_open(corpus_file, encoding='utf8') as fin:
+        with utils.open(corpus_file, 'rb', encoding='utf8') as fin:
             sentences = [line.strip().split() for line in fin.read().strip().split('\n')]
             self.assertEqual(sentences, ref_sentences)
 
@@ -520,7 +518,7 @@ def test_bytes_cy(self):
 
     def test_fb(self):
         """Test against results from Facebook's implementation."""
-        with smart_open.smart_open(datapath('fb-ngrams.txt'), 'r', encoding='utf-8') as fin:
+        with utils.open(datapath('fb-ngrams.txt'), 'r', encoding='utf-8') as fin:
             fb = dict(_read_fb(fin))
 
         for word, expected in fb.items():
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index 11257bebb1..b7cc5a8fb5 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -1060,7 +1060,7 @@ def testIdenticalSentences(self):
 class TestWord2VecSentenceIterators(unittest.TestCase):
     def testLineSentenceWorksWithFilename(self):
         """Does LineSentence work with a filename argument?"""
-        with utils.smart_open(datapath('lee_background.cor')) as orig:
+        with utils.open(datapath('lee_background.cor'), 'rb') as orig:
             sentences = word2vec.LineSentence(datapath('lee_background.cor'))
             for words in sentences:
                 self.assertEqual(words, utils.to_unicode(orig.readline()).split())
@@ -1069,41 +1069,41 @@ def testLineSentenceWorksWithFilename(self):
     def testCythonLineSentenceWorksWithFilename(self):
         """Does CythonLineSentence work with a filename argument?"""
         from gensim.models import word2vec_corpusfile
-        with utils.smart_open(datapath('lee_background.cor')) as orig:
+        with utils.open(datapath('lee_background.cor'), 'rb') as orig:
             sentences = word2vec_corpusfile.CythonLineSentence(datapath('lee_background.cor'))
             for words in sentences:
                 self.assertEqual(words, orig.readline().split())
 
     def testLineSentenceWorksWithCompressedFile(self):
         """Does LineSentence work with a compressed file object argument?"""
-        with utils.smart_open(datapath('head500.noblanks.cor')) as orig:
+        with utils.open(datapath('head500.noblanks.cor'), 'rb') as orig:
             sentences = word2vec.LineSentence(bz2.BZ2File(datapath('head500.noblanks.cor.bz2')))
             for words in sentences:
                 self.assertEqual(words, utils.to_unicode(orig.readline()).split())
 
     def testLineSentenceWorksWithNormalFile(self):
         """Does LineSentence work with a file object argument, rather than filename?"""
-        with utils.smart_open(datapath('head500.noblanks.cor')) as orig:
-            with utils.smart_open(datapath('head500.noblanks.cor')) as fin:
+        with utils.open(datapath('head500.noblanks.cor'), 'rb') as orig:
+            with utils.open(datapath('head500.noblanks.cor'), 'rb') as fin:
                 sentences = word2vec.LineSentence(fin)
                 for words in sentences:
                     self.assertEqual(words, utils.to_unicode(orig.readline()).split())
 
     def testPathLineSentences(self):
         """Does PathLineSentences work with a path argument?"""
-        with utils.smart_open(os.path.join(datapath('PathLineSentences'), '1.txt')) as orig1,\
-        utils.smart_open(os.path.join(datapath('PathLineSentences'), '2.txt.bz2')) as orig2:
-            sentences = word2vec.PathLineSentences(datapath('PathLineSentences'))
-            orig = orig1.readlines() + orig2.readlines()
-            orig_counter = 0  # to go through orig while matching PathLineSentences
-            for words in sentences:
-                self.assertEqual(words, utils.to_unicode(orig[orig_counter]).split())
-                orig_counter += 1
+        with utils.open(os.path.join(datapath('PathLineSentences'), '1.txt'), 'rb') as orig1:
+            with utils.open(os.path.join(datapath('PathLineSentences'), '2.txt.bz2'), 'rb') as orig2:
+                sentences = word2vec.PathLineSentences(datapath('PathLineSentences'))
+                orig = orig1.readlines() + orig2.readlines()
+                orig_counter = 0  # to go through orig while matching PathLineSentences
+                for words in sentences:
+                    self.assertEqual(words, utils.to_unicode(orig[orig_counter]).split())
+                    orig_counter += 1
 
     def testPathLineSentencesOneFile(self):
         """Does PathLineSentences work with a single file argument?"""
         test_file = os.path.join(datapath('PathLineSentences'), '1.txt')
-        with utils.smart_open(test_file) as orig:
+        with utils.open(test_file, 'rb') as orig:
             sentences = word2vec.PathLineSentences(test_file)
             for words in sentences:
                 self.assertEqual(words, utils.to_unicode(orig.readline()).split())
diff --git a/gensim/utils.py b/gensim/utils.py
index 4b6853a3b8..cf7eca6c90 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -42,7 +42,7 @@
 from six import iterkeys, iteritems, itervalues, u, string_types, unichr
 from six.moves import range
 
-from smart_open import smart_open
+from smart_open import open
 
 from multiprocessing import cpu_count
 
@@ -128,7 +128,7 @@ def file_or_filename(input):
     """
     if isinstance(input, string_types):
         # input was a filename: open as file
-        return smart_open(input)
+        return open(input, 'rb')
     else:
         # input already a file-like object; just reset to the beginning
         input.seek(0)
@@ -1360,7 +1360,7 @@ def pickle(obj, fname, protocol=2):
         Pickle protocol number. Default is 2 in order to support compatibility across python 2.x and 3.x.
 
     """
-    with smart_open(fname, 'wb') as fout:  # 'b' for binary, needed on Windows
+    with open(fname, 'wb') as fout:  # 'b' for binary, needed on Windows
         _pickle.dump(obj, fout, protocol=protocol)
 
 
@@ -1378,7 +1378,7 @@ def unpickle(fname):
         Python object loaded from `fname`.
 
     """
-    with smart_open(fname, 'rb') as f:
+    with open(fname, 'rb') as f:
         # Because of loading from S3 load can't be used (missing readline in smart_open)
         if sys.version_info > (3, 0):
             return _pickle.load(f, encoding='latin1')
@@ -2079,7 +2079,7 @@ def save_as_line_sentence(corpus, filename):
     corpus : iterable of iterables of strings
 
     """
-    with smart_open(filename, mode='wb', encoding='utf8') as fout:
+    with open(filename, mode='wb', encoding='utf8') as fout:
         for sentence in corpus:
             line = any2unicode(' '.join(sentence) + '\n')
             fout.write(line)

From fd025137aafa045fb244ff36b4c39c28e86eb686 Mon Sep 17 00:00:00 2001
From: Michael Penkov <m@penkov.dev>
Date: Fri, 28 Jun 2019 13:35:42 +0900
Subject: [PATCH 2/3] reduce scope of context manager in csvcorpus.py

---
 gensim/corpora/csvcorpus.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gensim/corpora/csvcorpus.py b/gensim/corpora/csvcorpus.py
index 59fbbe16f2..a3e94ae85e 100644
--- a/gensim/corpora/csvcorpus.py
+++ b/gensim/corpora/csvcorpus.py
@@ -47,9 +47,10 @@ def __init__(self, fname, labels):
         # load the first few lines, to guess the CSV dialect
         with utils.open(self.fname, 'rb') as f:
             head = ''.join(itertools.islice(f, 5))
-            self.headers = csv.Sniffer().has_header(head)
-            self.dialect = csv.Sniffer().sniff(head)
-            logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)
+            
+        self.headers = csv.Sniffer().has_header(head)
+        self.dialect = csv.Sniffer().sniff(head)
+        logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)
 
     def __iter__(self):
         """Iterate over the corpus, returning one BoW vector at a time.

From bb1a4f34aebb69f8167791be4609744ceb305465 Mon Sep 17 00:00:00 2001
From: Michael Penkov <m@penkov.dev>
Date: Fri, 28 Jun 2019 14:38:47 +0900
Subject: [PATCH 3/3] Update csvcorpus.py

---
 gensim/corpora/csvcorpus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gensim/corpora/csvcorpus.py b/gensim/corpora/csvcorpus.py
index a3e94ae85e..505d7c9be1 100644
--- a/gensim/corpora/csvcorpus.py
+++ b/gensim/corpora/csvcorpus.py
@@ -47,7 +47,7 @@ def __init__(self, fname, labels):
         # load the first few lines, to guess the CSV dialect
         with utils.open(self.fname, 'rb') as f:
             head = ''.join(itertools.islice(f, 5))
-            
+
         self.headers = csv.Sniffer().has_header(head)
         self.dialect = csv.Sniffer().sniff(head)
         logger.info("sniffed CSV delimiter=%r, headers=%s", self.dialect.delimiter, self.headers)