In [1]:
# Split DataFrame by Row Number
import pandas as pd
df = pd.DataFrame({
''''A'''': range(10),
''''B'''': range(10, 20)
})
df1 = df.iloc[:5]
df2 = df.iloc[5:]
print("First part:\n", df1)
print("Second part:\n", df2)
First part: A B 0 0 10 1 1 11 2 2 12 3 3 13 4 4 14 Second part: A B 5 5 15 6 6 16 7 7 17 8 8 18 9 9 19
In [2]:
# Split DataFrame into Equal Chunks
import pandas as pd
import numpy as np
import math
df = pd.DataFrame({
''''A'''': range(20),
''''B'''': range(10, 30)
})
df_chuncks=3
chunk_start=0
chunk_end=len(df)
chunk_step=math.ceil(len(df)/df_chuncks)
print("Original len(df):", len(df))
print("chunk_step:", chunk_step)
for i in range(0,df_chuncks):
df_chunk=df.iloc[(i*chunk_step):((i+1)*chunk_step)]
print("Chunk {} of {}".format(i+1,df_chuncks))
print(df_chunk)
Original len(df): 20 chunk_step: 7 Chunk 1 of 3 A B 0 0 10 1 1 11 2 2 12 3 3 13 4 4 14 5 5 15 6 6 16 Chunk 2 of 3 A B 7 7 17 8 8 18 9 9 19 10 10 20 11 11 21 12 12 22 13 13 23 Chunk 3 of 3 A B 14 14 24 15 15 25 16 16 26 17 17 27 18 18 28 19 19 29
In [3]:
# Split DataFrame by Column Value
import pandas as pd
df = pd.DataFrame({
''''Name'''': [''''Alice'''', ''''Bob'''', ''''Charlie'''', ''''Alice'''', ''''Bob'''', ''''Charlie''''],
''''Score'''': [85, 90, 95, 80, 85, 90]
})
grouped = df.groupby(''''Name'''')
for name, group in grouped:
print(f"Group: {name}\n", group)
Group: Alice Name Score 0 Alice 85 3 Alice 80 Group: Bob Name Score 1 Bob 90 4 Bob 85 Group: Charlie Name Score 2 Charlie 95 5 Charlie 90
In [4]:
# Split DataFrame into Training and Testing Sets
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.DataFrame({
''''A'''': range(10),
''''B'''': range(10, 20)
})
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)
print("Training set:\n", train_df)
print("Testing set:\n", test_df)
Training set: A B 0 0 10 7 7 17 2 2 12 9 9 19 4 4 14 3 3 13 6 6 16 Testing set: A B 8 8 18 1 1 11 5 5 15