Skip to content Skip to sidebar Skip to footer

Split Multiple Columns Of Lists Into Separate Rows

I have a dataframe like this - df = pd.DataFrame( {'key': [1, 2, 3, 4], 'col1': [['apple','orange'], ['pineapple'], ['','','guava','',''], ['','','orange','apple','']],

Solution 1:

Since you know that the number of non-empty elements in each list will always match, you can explode each column separately, filter out the blanks, and join the results back. Add on a .reset_index() if you want 'key' back as a column.

import pandas as pd

pd.concat([df.set_index('key')[[col]].explode(col).query(f'{col} != ""')
           for col in ['col1', 'col2']], axis=1)

# Without the f-string
#pd.concat([df.set_index('key')[[col]].explode(col).query(col + ' != ""')
#           for col in ['col1', 'col2']], axis=1)

          col1 col2
key                
1        apple  087
1       orange  799
2    pineapple  681
3        guava  078
4       orange  816
4        apple  018

If you are using an older verions of pandas that doesn't allow for the explode method use @BEN_YO's method to unnest. I'll copy the relevant code over here since there are a few different versions to choose from.

import numpy as np

def unnesting(df, explode):
    idx = df.index.repeat(df[explode[0]].str.len())
    df1 = pd.concat([
        pd.DataFrame({x: np.concatenate(df[x].values)}) for x in explode], axis=1)
    df1.index = idx

    return df1.join(df.drop(explode, 1), how='left')

pd.concat([unnesting(df.set_index('key')[[col]], explode=[col]).query(f'{col} !=""')
           for col in ['col1', 'col2']], axis=1)
# Same output as above

Solution 2:

try creating new df on top of old one like this

df['key'] =  df.apply(lambda x: [x['key']]*len(x['col2']), axis=1)
lst_col = ['key', 'col1', 'col2']
df = pd.DataFrame({
    col:[x for lst in list(df[col]) for x in lst if x!=""] for col in lst_col

})

Output

    key col1       col2
0   1   apple       087
1   1   orange      799
2   2   pineapple   681
3   3   guava       078
4   4   orange      816
5   4   apple       018

Solution 3:

For the sake of complexity :)

 pd.DataFrame([j for i in [[{"key": x['key'],"col1": y,'col2':x['col2'][list(filter(None, x['col1'])).index(y)]} for y in list(filter(None, x['col1']))]for idx, x in df.iterrows()] for j in i])

Output

|   key | col1      |   col2 ||------:|:----------|-------:||     1 | apple     |    087 ||     1 | orange    |    799 ||     2 | pineapple |    681 ||     3 | guava     |    078 ||     4 | orange    |    816 ||     4 | apple     |    018 |

Solution 4:

try this

newkeys= list(itertools.chain.from_iterable(df.apply(lambda vals : [vals[0]]*len(vals[2]), axis=1).tolist()))
newcol1, newcol2 =  list(itertools.chain.from_iterable(df.col1)),  list(itertools.chain.from_iterable(df.col2))
newcol1=list(filter(None, newcol1))
pd.DataFrame(zip(*[newkeys, newcol1, newcol2]), columns=df.columns)

Post a Comment for "Split Multiple Columns Of Lists Into Separate Rows"