Skip to content

Commit

Permalink
BUG: to_csv should allow writing of dupe cols if within same block GH…
Browse files Browse the repository at this point in the history
…3095

closes #3095
  • Loading branch information
y-p committed Mar 19, 2013
1 parent ad082bc commit 1f138a4
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 108 deletions.
34 changes: 23 additions & 11 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,11 +803,20 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
ncols = sum(len(b.items) for b in self.blocks)
self.data =[None] * ncols

# fail early if we have duplicate columns
if len(set(self.cols)) != len(self.cols):
raise Exception("duplicate columns are not permitted in to_csv")
if self.obj.columns.is_unique:
self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))
else:
ks = [set(x.items) for x in self.blocks]
u = len(reduce(lambda a,x: a.union(x),ks,set()))
t = sum(map(len,ks))
if u != t:
if len(set(self.cols)) != len(self.cols):
raise NotImplementedError("duplicate columns with differing dtypes are unsupported")
else:
# if columns are not unique and we acces this,
# we're doing it wrong
pass

self.colname_map = dict((k,i) for i,k in enumerate(obj.columns))

if chunksize is None:
chunksize = (100000/ (len(self.cols) or 1)) or 1
Expand Down Expand Up @@ -1002,17 +1011,20 @@ def _save(self):

def _save_chunk(self, start_i, end_i):

colname_map = self.colname_map
data_index = self.data_index

# create the data for a chunk
slicer = slice(start_i,end_i)
for i in range(len(self.blocks)):
b = self.blocks[i]
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
for j, k in enumerate(b.items):
# self.data is a preallocated list
self.data[colname_map[k]] = d[j]
if self.obj.columns.is_unique:
for i in range(len(self.blocks)):
b = self.blocks[i]
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
for j, k in enumerate(b.items):
# self.data is a preallocated list
self.data[self.colname_map[k]] = d[j]
else:
for i in range(len(self.cols)):
self.data[i] = self.obj.icol(i).values[slicer].tolist()

ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)

Expand Down
Loading

0 comments on commit 1f138a4

Please sign in to comment.