In [1]:
# Using pd.get_dummies()
import pandas as pd
df = pd.DataFrame({
''''Color'''': [''''Red'''', ''''Green'''', ''''Blue'''', ''''Red'''']
})
df_encoded = pd.get_dummies(df, columns=[''''Color''''])
print("df")
print(df)
print("df_encoded")
print(df_encoded)
df Color 0 Red 1 Green 2 Blue 3 Red df_encoded Color_Blue Color_Green Color_Red 0 False False True 1 False True False 2 True False False 3 False False True
In [2]:
# One-Hot Encoding Multiple Columns
import pandas as pd
df = pd.DataFrame({
''''Color'''': [''''Red'''', ''''Green'''', ''''Blue'''', ''''Red''''],
''''Size'''': [''''S'''', ''''M'''', ''''L'''', ''''S'''']
})
df_encoded = pd.get_dummies(df, columns=[''''Color'''', ''''Size''''])
print(df_encoded)
Color_Blue Color_Green Color_Red Size_L Size_M Size_S 0 False False True False False True 1 False True False False True False 2 True False False True False False 3 False False True False False True
In [3]:
# Using OneHotEncoder from sklearn
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
df = pd.DataFrame({
''''Color'''': [''''Red'''', ''''Green'''', ''''Blue'''', ''''Red'''']
})
encoder = OneHotEncoder()
encoded_data = encoder.fit_transform(df[[''''Color'''']]).toarray()
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out([''''Color'''']))
df_encoded = df.join(encoded_df)
print(df_encoded)
Color Color_Blue Color_Green Color_Red 0 Red 0.0 0.0 1.0 1 Green 0.0 1.0 0.0 2 Blue 1.0 0.0 0.0 3 Red 0.0 0.0 1.0