In [1]:
#Checking for Null Values
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
print(df.isnull())
print("###")
print(df.isnull().sum())
A B C 0 False True False 1 False False True 2 True False True 3 False False False ### A 1 B 1 C 2 dtype: int64
In [2]:
#Dropping Rows with Null Values
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_dropped = df.dropna()
df_dropped_all = df.dropna(how=''''all'''')
print(df)
print("###")
print(df_dropped)
print("###")
print(df_dropped_all)
A B C
0 1.0 NaN 1.0
1 2.0 2.0 NaN
2 NaN 3.0 NaN
3 4.0 4.0 4.0
###
A B C
3 4.0 4.0 4.0
###
A B C
0 1.0 NaN 1.0
1 2.0 2.0 NaN
2 NaN 3.0 NaN
3 4.0 4.0 4.0
In [3]:
#Filling Null Values with a Specific Value
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_filled = df.fillna(0)
print(df)
print("###")
print(df_filled)
A B C
0 1.0 NaN 1.0
1 2.0 2.0 NaN
2 NaN 3.0 NaN
3 4.0 4.0 4.0
###
A B C
0 1.0 0.0 1.0
1 2.0 2.0 0.0
2 0.0 3.0 0.0
3 4.0 4.0 4.0
In [4]:
#Filling Null Values with Mean/Median/Mode
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
df_filled_mean = df.fillna(df.mean())
df_filled_median = df.fillna(df.median())
df_filled_mode = df.fillna(df.mode().iloc[0])
print(df)
print("### MEAN")
print(df_filled_mean)
print("### MEDIAN")
print(df_filled_median)
print("### MODE")
print(df_filled_mode)
A B C
0 1.0 NaN 1.0
1 2.0 2.0 NaN
2 NaN 3.0 NaN
3 4.0 4.0 4.0
### MEAN
A B C
0 1.000000 3.0 1.0
1 2.000000 2.0 2.5
2 2.333333 3.0 2.5
3 4.000000 4.0 4.0
### MEDIAN
A B C
0 1.0 3.0 1.0
1 2.0 2.0 2.5
2 2.0 3.0 2.5
3 4.0 4.0 4.0
### MODE
A B C
0 1.0 2.0 1.0
1 2.0 2.0 1.0
2 1.0 3.0 1.0
3 4.0 4.0 4.0
In [5]:
# Forward Fill and Backward Fill
import pandas as pd
df = pd.DataFrame({
''''A'''': [1, 2, None, 4],
''''B'''': [None, 2, 3, 4],
''''C'''': [1, None, None, 4]
})
#df_ffill = df.fillna(method=''''ffill'''')
df_ffill = df.ffill()
#df_bfill = df.fillna(method=''''bfill'''')
df_bfill = df.bfill()
print(df)
print("### ffill")
print(df_ffill)
print("### bfill")
print(df_bfill)
A B C
0 1.0 NaN 1.0
1 2.0 2.0 NaN
2 NaN 3.0 NaN
3 4.0 4.0 4.0
### ffill
A B C
0 1.0 NaN 1.0
1 2.0 2.0 1.0
2 2.0 3.0 1.0
3 4.0 4.0 4.0
### bfill
A B C
0 1.0 2.0 1.0
1 2.0 2.0 4.0
2 4.0 3.0 4.0
3 4.0 4.0 4.0