In [1]:
#Checking for Null Values
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
print(df.isnull())
print("###")
print(df.isnull().sum())
A B C 0 False True False 1 False False True 2 True False True 3 False False False ### A 1 B 1 C 2 dtype: int64
In [2]:
#Dropping Rows with Null Values
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_dropped = df.dropna()
df_dropped_all = df.dropna(how=''''all'''')
print(df)
print("###")
print(df_dropped)
print("###")
print(df_dropped_all)
A B C 0 1.0 NaN 1.0 1 2.0 2.0 NaN 2 NaN 3.0 NaN 3 4.0 4.0 4.0 ### A B C 3 4.0 4.0 4.0 ### A B C 0 1.0 NaN 1.0 1 2.0 2.0 NaN 2 NaN 3.0 NaN 3 4.0 4.0 4.0
In [3]:
#Filling Null Values with a Specific Value
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_filled = df.fillna(0)
print(df)
print("###")
print(df_filled)
A B C 0 1.0 NaN 1.0 1 2.0 2.0 NaN 2 NaN 3.0 NaN 3 4.0 4.0 4.0 ### A B C 0 1.0 0.0 1.0 1 2.0 2.0 0.0 2 0.0 3.0 0.0 3 4.0 4.0 4.0
In [4]:
#Filling Null Values with Mean/Median/Mode
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_filled_mean = df.fillna(df.mean())
df_filled_median = df.fillna(df.median())
df_filled_mode = df.fillna(df.mode().iloc[0])
print(df)
print("### MEAN")
print(df_filled_mean)
print("### MEDIAN")
print(df_filled_median)
print("### MODE")
print(df_filled_mode)
A B C 0 1.0 NaN 1.0 1 2.0 2.0 NaN 2 NaN 3.0 NaN 3 4.0 4.0 4.0 ### MEAN A B C 0 1.000000 3.0 1.0 1 2.000000 2.0 2.5 2 2.333333 3.0 2.5 3 4.000000 4.0 4.0 ### MEDIAN A B C 0 1.0 3.0 1.0 1 2.0 2.0 2.5 2 2.0 3.0 2.5 3 4.0 4.0 4.0 ### MODE A B C 0 1.0 2.0 1.0 1 2.0 2.0 1.0 2 1.0 3.0 1.0 3 4.0 4.0 4.0
In [5]:
# Forward Fill and Backward Fill
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
#df_ffill = df.fillna(method=''''ffill'''')
df_ffill = df.ffill()
#df_bfill = df.fillna(method=''''bfill'''')
df_bfill = df.bfill()
print(df)
print("### ffill")
print(df_ffill)
print("### bfill")
print(df_bfill)
A B C 0 1.0 NaN 1.0 1 2.0 2.0 NaN 2 NaN 3.0 NaN 3 4.0 4.0 4.0 ### ffill A B C 0 1.0 NaN 1.0 1 2.0 2.0 1.0 2 2.0 3.0 1.0 3 4.0 4.0 4.0 ### bfill A B C 0 1.0 2.0 1.0 1 2.0 2.0 4.0 2 4.0 3.0 4.0 3 4.0 4.0 4.0