Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- df = pd.DataFrame({
- 'Options': [ [], ["HELLO"], [], ["WORLD", "!!!"] ],
- 'Product code': [ "A", "B", "C", "D" ]
- })
- def unnest(df, col, col2,reset_index=False):
- for item in df[col]:
- if len(item) == 0:
- item.append('[]')
- col_flat = pd.DataFrame([[i, x]
- for i, y in df[col].apply(list).iteritems()
- for x in y ], columns=['I', col]
- )
- col_flat = col_flat.set_index('I')
- df = df.drop(col, 1)
- df = df.merge(col_flat, left_index=True, right_index=True)
- if reset_index:
- df = df.reset_index(drop=True)
- df['item_group_id'] = df['Product code']
- df['Product code'] += df[col].apply(lambda val: '' if val == '[]' else '-' + val)
- return df
- print(df)
- df = unnest(df,'Options','product code')
- print(df)
- # before
- '''
- Options Product code
- 0 [] A
- 1 [HELLO] B
- 2 [] C
- 3 [WORLD, !!!] D
- '''
- # after
- '''
- Product code Options item_group_id
- 0 A [] A
- 1 B-HELLO HELLO B
- 2 C [] C
- 3 D-WORLD WORLD D
- 3 D-!!! !!! D
- '''
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement