Skip to content

Commit

Permalink
BUG: print DataFrame columns in the right order, also convert NAs in …
Browse files Browse the repository at this point in the history
…string columns, GH #325
  • Loading branch information
wesm committed Nov 5, 2011
1 parent fc7315d commit 6a0452b
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 12 deletions.
17 changes: 9 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2890,7 +2890,11 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
self.formatters = formatters
self.na_rep = na_rep
self.col_space = col_space
self.column_filter = frame.columns if columns is None else set(columns)

if columns is not None:
self.columns = _ensure_index(columns)
else:
self.columns = frame.columns

self._write_to_buffer()

Expand All @@ -2909,8 +2913,7 @@ def _write_to_buffer(self):
str_columns = self._get_formatted_column_labels()

stringified = [str_columns[i] + format_col(c)
for i, c in enumerate(frame.columns)
if c in self.column_filter]
for i, c in enumerate(self.columns)]

to_write.append(adjoin(1, str_index, *stringified))

Expand Down Expand Up @@ -2946,18 +2949,16 @@ def _format_col(col):
def _get_formatted_column_labels(self):
from pandas.core.index import _sparsify

columns = self.frame.columns

if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
if isinstance(self.columns, MultiIndex):
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
str_columns = zip(*[[' %s' % y for y in x]
for x in zip(*fmt_columns)])
if self.sparsify:
str_columns = _sparsify(str_columns)

str_columns = [list(x) for x in zip(*str_columns)]
else:
str_columns = [[' %s' % x] for x in columns.format()]
str_columns = [[' %s' % x] for x in self.columns.format()]

if self.show_index_names and self.has_index_names:
for x in str_columns:
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def _convert_types(values, na_values):
try:
values = lib.maybe_convert_numeric(values, na_values)
except Exception:
lib.sanitize_objects(values)
lib.sanitize_objects(values, na_values)

if values.dtype == np.object_:
return lib.maybe_convert_bool(values)
Expand Down
13 changes: 13 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ def test_custom_na_values(self):
skiprows=[1])
assert_almost_equal(df2.values, expected)

def test_detect_string_na(self):
data = """A,B
foo,bar
NA,baz
NaN,nan
"""
expected = [['foo', 'bar'],
[nan, 'baz'],
[nan, nan]]

df = read_csv(StringIO(data))
assert_almost_equal(df.values, expected)

def test_unnamed_columns(self):
data = """A,B,C,,
1,2,3,4,5
Expand Down
4 changes: 2 additions & 2 deletions pandas/src/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def try_parse_dates(ndarray[object] values, parser=None):

return result

def sanitize_objects(ndarray[object] values):
def sanitize_objects(ndarray[object] values, set na_values):
cdef:
Py_ssize_t i, n
object val, onan
Expand All @@ -200,7 +200,7 @@ def sanitize_objects(ndarray[object] values):

for i from 0 <= i < n:
val = values[i]
if val == '':
if val == '' or val in na_values:
values[i] = onan

def maybe_convert_bool(ndarray[object] arr):
Expand Down
23 changes: 22 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,9 @@ def test_repr_corner(self):
foo = repr(df)

def test_to_string(self):
from pandas import read_table
import re

# big mixed
biggie = DataFrame({'A' : randn(1000),
'B' : tm.makeStringIndex(1000)},
Expand All @@ -1340,7 +1343,25 @@ def test_to_string(self):

self.assert_(isinstance(s, basestring))

biggie.to_string(columns=['B', 'A'], colSpace=17)
# print in right order
result = biggie.to_string(columns=['B', 'A'], colSpace=17,
float_format='%.6f'.__mod__)
lines = result.split('\n')
header = lines[0].strip().split()
joined = '\n'.join([re.sub('\s+', ' ', x).strip() for x in lines[1:]])
recons = read_table(StringIO(joined), names=header, sep=' ')
assert_series_equal(recons['B'], biggie['B'])
assert_series_equal(np.round(recons['A'], 2),
np.round(biggie['A'], 2))

# expected = ['B', 'A']
# self.assertEqual(header, expected)

result = biggie.to_string(columns=['A'], colSpace=17)
header = result.split('\n')[0].strip().split()
expected = ['A']
self.assertEqual(header, expected)

biggie.to_string(columns=['B', 'A'],
formatters={'A' : lambda x: '%.1f' % x})

Expand Down

0 comments on commit 6a0452b

Please sign in to comment.