pandas-dev · jreback · Mar 20, 2019 · Jan 26, 2019 · Jan 26, 2019 · Feb 15, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -26,6 +26,7 @@ Other Enhancements
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
 - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
 - :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
+- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 
 .. _whatsnew_0250.api_breaking:
 

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -38,6 +38,7 @@
 import inspect
 from collections import namedtuple
 import collections
+import uuid
 
 PY2 = sys.version_info[0] == 2
 PY3 = sys.version_info[0] >= 3
@@ -115,6 +116,7 @@ def get_range_parameters(data):
     reduce = functools.reduce
     long = int
     unichr = chr
+    uuid3 = uuid.uuid3
 
     # This was introduced in Python 3.3, but we don't support
     # Python 3.x < 3.5, so checking PY3 is safe.
@@ -162,6 +164,10 @@ def bytes_to_str(b, encoding='ascii'):
     def signature(f):
         return inspect.getargspec(f)
 
+    # See also: https://bugs.python.org/issue34145
+    def uuid3(namespace, name):
+        return uuid.uuid3(namespace, name.encode('utf-8'))
+
     def get_range_parameters(data):
         """Gets the start, stop, and step parameters from a range object"""
         # seems we only have indexing ops to infer

diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py
@@ -1,9 +1,14 @@
+import uuid
+
 import numpy as np
 
-from pandas.compat import reduce
+from pandas.compat import reduce, string_types, uuid3
 
 import pandas as pd
 
+# A token value Python's tokenizer probably will never use.
+_BACKTICK_QUOTED_STRING = 100
+
 
 def _ensure_decoded(s):
     """ if we have bytes, decode them to unicode """
@@ -22,5 +27,16 @@ def _result_type_many(*arrays_and_dtypes):
         return reduce(np.result_type, arrays_and_dtypes)
 
 
+def _remove_spaces_column_name(name):
+    """Check if name contains any spaces, if it contains any spaces
+    the spaces will be removed and an underscore suffix is added."""
+    if not isinstance(name, string_types) or " " not in name:
+        return name
+
+    # uuid3 will provide a unique string that can be independently reproduced.
+    return name.replace(" ", "_") + "_" + str(uuid3(
+        uuid.NAMESPACE_DNS, name)).replace("-", "")
+
+
 class NameResolutionError(NameError):
     pass
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -3,16 +3,20 @@
 
 import ast
 from functools import partial
+import itertools as it
+import operator
 import tokenize
 
 import numpy as np
 
-from pandas.compat import StringIO, lmap, reduce, string_types, zip
+from pandas.compat import StringIO, lmap, map, reduce, string_types, zip
 
 import pandas as pd
 from pandas import compat
 from pandas.core import common as com
 from pandas.core.base import StringMixin
+from pandas.core.computation.common import (
+    _BACKTICK_QUOTED_STRING, _remove_spaces_column_name)
 from pandas.core.computation.ops import (
     _LOCAL_TAG, BinOp, Constant, Div, FuncNode, Op, Term, UnaryOp,
     UndefinedVariableError, _arith_ops_syms, _bool_ops_syms, _cmp_ops_syms,
@@ -31,7 +35,17 @@ def tokenize_string(source):
         A Python source code string
     """
     line_reader = StringIO(source).readline
-    for toknum, tokval, _, _, _ in tokenize.generate_tokens(line_reader):
+    token_generator = tokenize.generate_tokens(line_reader)
+
+    # Loop over all tokens till a backtick (`) is found.
+    # Then, take all tokens till the next backtick to form a backtick quoted
+    # string.
+    for toknum, tokval, _, _, _ in token_generator:
+        if tokval == '`':
+            tokval = " ".join(it.takewhile(
+                lambda tokval: tokval != '`',
+                map(operator.itemgetter(1), token_generator)))
+            toknum = _BACKTICK_QUOTED_STRING
         yield toknum, tokval
 
 
@@ -102,6 +116,31 @@ def _replace_locals(tok):
     return toknum, tokval
 
 
+def _clean_spaces_backtick_quoted_names(tok):
+    """Clean up a column name if surrounded by backticks.
+
+    Backtick quoted string are indicated by a certain tokval value. If a string
+    is a backtick quoted token it will processed by
+    :func:`_remove_spaces_column_name` so that the parser can find this
+    string when the query is executed.
+    See also :meth:`NDFrame._get_space_character_free_column_resolver`.
+
+    Parameters
+    ----------
+    tok : tuple of int, str
+        ints correspond to the all caps constants in the tokenize module
+
+    Returns
+    -------
+    t : tuple of int, str
+        Either the input or token or the replacement values
+    """
+    toknum, tokval = tok
+    if toknum == _BACKTICK_QUOTED_STRING:
+        return tokenize.NAME, _remove_spaces_column_name(tokval)
+    return toknum, tokval
+
+
 def _compose2(f, g):
     """Compose 2 callables"""
     return lambda *args, **kwargs: f(g(*args, **kwargs))
@@ -114,7 +153,8 @@ def _compose(*funcs):
 
 
 def _preparse(source, f=_compose(_replace_locals, _replace_booleans,
-                                 _rewrite_assign)):
+                                 _rewrite_assign,
+                                 _clean_spaces_backtick_quoted_names)):
     """Compose a collection of tokenization functions
 
     Parameters
@@ -711,8 +751,9 @@ def visitor(x, y):
 class PandasExprVisitor(BaseExprVisitor):
 
     def __init__(self, env, engine, parser,
-                 preparser=partial(_preparse, f=_compose(_replace_locals,
-                                                         _replace_booleans))):
+                 preparser=partial(_preparse, f=_compose(
+                     _replace_locals, _replace_booleans,
+                     _clean_spaces_backtick_quoted_names))):
         super(PandasExprVisitor, self).__init__(env, engine, parser, preparser)
 
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2967,6 +2967,15 @@ def query(self, expr, inplace=False, **kwargs):
             The query string to evaluate.  You can refer to variables
             in the environment by prefixing them with an '@' character like
             ``@a + b``.
+
+            .. versionadded:: 0.25.0
+
+            You can refer to column names that contain spaces by surrounding
+            them in backticks.
+
+            For example, if one of your columns is called ``a a`` and you want
+            to sum it with ``b``, your query should be ```a a` + b``.
+
         inplace : bool
             Whether the query should modify the data in place or return
             a modified copy.
@@ -3160,7 +3169,9 @@ def eval(self, expr, inplace=False, **kwargs):
         kwargs['level'] = kwargs.pop('level', 0) + 1
         if resolvers is None:
             index_resolvers = self._get_index_resolvers()
-            resolvers = dict(self.iteritems()), index_resolvers
+            column_resolvers = \
+                self._get_space_character_free_column_resolvers()
+            resolvers = column_resolvers, index_resolvers
         if 'target' not in kwargs:
             kwargs['target'] = self
         kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -38,6 +38,7 @@
 import pandas.core.algorithms as algos
 from pandas.core.base import PandasObject, SelectionMixin
 import pandas.core.common as com
+from pandas.core.computation.common import _remove_spaces_column_name
 from pandas.core.index import (
     Index, InvalidIndexError, MultiIndex, RangeIndex, ensure_index)
 from pandas.core.indexes.datetimes import DatetimeIndex
@@ -423,6 +424,17 @@ def _get_index_resolvers(self):
             d.update(self._get_axis_resolvers(axis_name))
         return d
 
+    def _get_space_character_free_column_resolvers(self):
+        """Return the space character free column resolvers of a dataframe.
+
+        Column names with spaces are 'cleaned up' so that they can be referred
+        to by backtick quoting.
+        Used in :meth:`DataFrame.eval`.
+        """
+
+        return {_remove_spaces_column_name(k): v for k, v
+                in self.iteritems()}
+
     @property
     def _info_axis(self):
         return getattr(self, self._info_axis_name)

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -1031,3 +1031,54 @@ def test_invalid_type_for_operator_raises(self, parser, engine, op):
 
         with pytest.raises(TypeError, match=msg):
             df.eval('a {0} b'.format(op), engine=engine, parser=parser)
+
+
+class TestDataFrameQueryBacktickQuoting(object):
+
+    @pytest.fixture(scope='class')
+    def df(self):
+        yield DataFrame({'A': [1, 2, 3],
+                         'B B': [3, 2, 1],
+                         'C C': [4, 5, 6],
+                         'C_C': [8, 9, 10],
+                         'D_D D': [11, 1, 101]})
+
+    def test_single_backtick_variable_query(self, df):
+        res = df.query('1 < `B B`')
+        expect = df[1 < df['B B']]
+        assert_frame_equal(res, expect)
+
+    def test_two_backtick_variables_query(self, df):
+        res = df.query('1 < `B B` and 4 < `C C`')
+        expect = df[(1 < df['B B']) & (4 < df['C C'])]
+        assert_frame_equal(res, expect)
+
+    def test_single_backtick_variable_expr(self, df):
+        res = df.eval('A + `B B`')
+        expect = df['A'] + df['B B']
+        assert_series_equal(res, expect)
+
+    def test_two_backtick_variables_expr(self, df):
+        res = df.eval('`B B` + `C C`')
+        expect = df['B B'] + df['C C']
+        assert_series_equal(res, expect)
+
+    def test_already_underscore_variable(self, df):
+        res = df.eval('`C_C` + A')
+        expect = df['C_C'] + df['A']
+        assert_series_equal(res, expect)
+
+    def test_same_name_but_underscores(self, df):
+        res = df.eval('C_C + `C C`')
+        expect = df['C_C'] + df['C C']
+        assert_series_equal(res, expect)
+
+    def test_mixed_underscores_and_spaces(self, df):
+        res = df.eval('A + `D_D D`')
+        expect = df['A'] + df['D_D D']
+        assert_series_equal(res, expect)
+
+    def backtick_quote_name_with_no_spaces(self, df):
+        res = df.eval('A + `C_C`')
+        expect = df['A'] + df['C_C']
+        assert_series_equal(res, expect)