In [1]:
#Converting Strings to Datetime
import pandas as pd
df = pd.DataFrame({
''''date_str'''': [''''2024-01-01'''', ''''2024-02-01'''', ''''2024-03-01'''']
})
df[''''date''''] = pd.to_datetime(df[''''date_str''''])
print(df)
date_str date 0 2024-01-01 2024-01-01 1 2024-02-01 2024-02-01 2 2024-03-01 2024-03-01
In [2]:
#Extracting Date Components
import pandas as pd
df = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01'''', ''''2024-02-01'''', ''''2024-03-01''''])
})
df[''''year''''] = df[''''date''''].dt.year
df[''''month''''] = df[''''date''''].dt.month
df[''''day''''] = df[''''date''''].dt.day
print(df)
date year month day 0 2024-01-01 2024 1 1 1 2024-02-01 2024 2 1 2 2024-03-01 2024 3 1
In [3]:
# Setting a Datetime Column as Index
import pandas as pd
df = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01'''', ''''2024-02-01'''', ''''2024-03-01'''']),
''''value'''': [10, 20, 30]
})
df.set_index(''''date'''', inplace=True)
print(df)
value date 2024-01-01 10 2024-02-01 20 2024-03-01 30
In [4]:
# Filtering Data by Date Range
import pandas as pd
df = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01'''', ''''2024-02-01'''', ''''2024-03-01'''']),
''''value'''': [10, 20, 30]
})
start_date = ''''2024-01-01''''
end_date = ''''2024-02-01''''
filtered_df = df[(df[''''date''''] >= start_date) & (df[''''date''''] <= end_date)]
print("df")
print(df)
print("filtered_df")
print(filtered_df)
df date value 0 2024-01-01 10 1 2024-02-01 20 2 2024-03-01 30 filtered_df date value 0 2024-01-01 10 1 2024-02-01 20
In [5]:
# Resampling Time Series Data
import pandas as pd
df = pd.DataFrame({
''''date'''': pd.date_range(start=''''2024-01-01'''', periods=6, freq=''''D''''),
''''value'''': [10, 20, 30, 40, 50, 60]
})
df.set_index(''''date'''', inplace=True)
monthly_st_df = df.resample(''''MS'''').mean()
monthly_en_df = df.resample(''''ME'''').mean()
print("df")
print(df)
print("monthly_st_df")
print(monthly_st_df)
print("monthly_en_df")
print(monthly_en_df)
df value date 2024-01-01 10 2024-01-02 20 2024-01-03 30 2024-01-04 40 2024-01-05 50 2024-01-06 60 monthly_st_df value date 2024-01-01 35.0 monthly_en_df value date 2024-01-31 35.0
In [6]:
# Handling Time Zones
import pandas as pd
df = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01 10:00'''', ''''2024-02-01 12:00'''', ''''2024-03-01 14:00''''])
})
print("df")
print(df)
df[''''date''''] = df[''''date''''].dt.tz_localize(''''UTC'''')
df[''''date''''] = df[''''date''''].dt.tz_convert(''''US/Eastern'''')
print("df new")
print(df)
df date 0 2024-01-01 10:00:00 1 2024-02-01 12:00:00 2 2024-03-01 14:00:00 df new date 0 2024-01-01 05:00:00-05:00 1 2024-02-01 07:00:00-05:00 2 2024-03-01 09:00:00-05:00
In [7]:
# merging 2 dataframes based on a date field
import pandas as pd
df1 = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01'''', ''''2024-01-02'''', ''''2024-01-03'''']),
''''value_A'''': [10, 20, 30]
})
df2 = pd.DataFrame({
''''date'''': pd.to_datetime([''''2024-01-01'''', ''''2024-01-02'''', ''''2024-01-04'''']),
''''value_B'''': [40, 50, 60]
})
print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)
# Merging DataFrames on the ''''date'''' column
merged_df = pd.merge(df1, df2, on=''''date'''', how=''''inner'''')
print("\nMerged DataFrame (Inner Join):")
print(merged_df)
# Left join
merged_df_left = pd.merge(df1, df2, on=''''date'''', how=''''left'''')
print("\nMerged DataFrame (Left Join):")
print(merged_df_left)
# Right join
merged_df_right = pd.merge(df1, df2, on=''''date'''', how=''''right'''')
print("\nMerged DataFrame (Right Join):")
print(merged_df_right)
# Outer join
merged_df_outer = pd.merge(df1, df2, on=''''date'''', how=''''outer'''')
print("\nMerged DataFrame (Outer Join):")
print(merged_df_outer)
DataFrame 1: date value_A 0 2024-01-01 10 1 2024-01-02 20 2 2024-01-03 30 DataFrame 2: date value_B 0 2024-01-01 40 1 2024-01-02 50 2 2024-01-04 60 Merged DataFrame (Inner Join): date value_A value_B 0 2024-01-01 10 40 1 2024-01-02 20 50 Merged DataFrame (Left Join): date value_A value_B 0 2024-01-01 10 40.0 1 2024-01-02 20 50.0 2 2024-01-03 30 NaN Merged DataFrame (Right Join): date value_A value_B 0 2024-01-01 10.0 40 1 2024-01-02 20.0 50 2 2024-01-04 NaN 60 Merged DataFrame (Outer Join): date value_A value_B 0 2024-01-01 10.0 40.0 1 2024-01-02 20.0 50.0 2 2024-01-03 30.0 NaN 3 2024-01-04 NaN 60.0
In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Generate date range
dates = pd.date_range(start=''''2022-01-01'''', end=''''2023-12-31'''', freq=''''B'''') # Business days
# Generate random stock prices
np.random.seed(0)
prices = np.random.normal(loc=100, scale=10, size=len(dates))
df = pd.DataFrame({''''price'''': prices}, index=dates)
start_date = ''''2022-06-01''''
end_date = ''''2022-10-05''''
#mask = (df.index < start_date) | (df.index > end_date)
#df_original = df.loc[mask]
df.loc[start_date:end_date, ''''price''''] = np.nan
df_original = df
In [9]:
# Forward fill
df_ffill = df_original.copy()
df_ffill = df_ffill.ffill()
# Backward fill
df_bfill = df_original.copy()
df_bfill = df_bfill.bfill()
# Interpolation
df_interp = df_original.copy()
df_interp = df_interp.interpolate(method=''''spline'''', order=1)
#''''linear'''', ''''time'''', ''''index'''', ''''values'''', ''''nearest'''', ''''zero'''', ''''slinear'''', ''''quadratic'''', ''''cubic'''', ''''barycentric'''', ''''krogh'''', ''''spline'''', ''''polynomial'''', ''''from_derivatives'''', ''''piecewise_polynomial'''', ''''pchip'''', ''''akima'''', ''''cubicspline''''
# Drop missing values
df_drop = df_original.copy()
df_drop = df_drop.fillna(20)
In [10]:
# Plotting
# Plotting
fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(14, 16), sharex=True)
# Original data
axes[0].plot(df_original.index, df_original[''''price''''], label=''''Original'''', color=''''blue'''', linestyle=''''--'''')
axes[0].set_title(''''Original Data'''')
axes[0].legend()
# Forward fill
axes[1].plot(df_ffill.index, df_ffill[''''price''''], label=''''Forward Fill'''', color=''''green'''')
axes[1].set_title(''''Forward Fill'''')
axes[1].legend()
# Backward fill
axes[2].plot(df_bfill.index, df_bfill[''''price''''], label=''''Backward Fill'''', color=''''red'''')
axes[2].set_title(''''Backward Fill'''')
axes[2].legend()
# Interpolation
axes[3].plot(df_interp.index, df_interp[''''price''''], label=''''Interpolation'''', color=''''purple'''')
axes[3].set_title(''''Interpolation'''')
axes[3].legend()
# fillna
axes[4].plot(df_drop.index, df_drop[''''price''''], label=''''fillna'''', color=''''purple'''')
axes[4].set_title(''''fillna'''')
axes[4].legend()
plt.xlabel(''''Date'''')
plt.ylabel(''''Price'''')
plt.tight_layout()
plt.show()