In [1]:
# Split DataFrame by Row Number
import pandas as pd
df = pd.DataFrame({
''''A'''': range(10),
''''B'''': range(10, 20)
})
df1 = df.iloc[:5]
df2 = df.iloc[5:]
print("First part:\n", df1)
print("Second part:\n", df2)
First part:
A B
0 0 10
1 1 11
2 2 12
3 3 13
4 4 14
Second part:
A B
5 5 15
6 6 16
7 7 17
8 8 18
9 9 19
In [2]:
# Split DataFrame into Equal Chunks
import pandas as pd
import numpy as np
import math
df = pd.DataFrame({
''''A'''': range(20),
''''B'''': range(10, 30)
})
df_chuncks=3
chunk_start=0
chunk_end=len(df)
chunk_step=math.ceil(len(df)/df_chuncks)
print("Original len(df):", len(df))
print("chunk_step:", chunk_step)
for i in range(0,df_chuncks):
df_chunk=df.iloc[(i*chunk_step):((i+1)*chunk_step)]
print("Chunk {} of {}".format(i+1,df_chuncks))
print(df_chunk)
Original len(df): 20
chunk_step: 7
Chunk 1 of 3
A B
0 0 10
1 1 11
2 2 12
3 3 13
4 4 14
5 5 15
6 6 16
Chunk 2 of 3
A B
7 7 17
8 8 18
9 9 19
10 10 20
11 11 21
12 12 22
13 13 23
Chunk 3 of 3
A B
14 14 24
15 15 25
16 16 26
17 17 27
18 18 28
19 19 29
In [3]:
# Split DataFrame by Column Value
import pandas as pd
df = pd.DataFrame({
''''Name'''': [''''Alice'''', ''''Bob'''', ''''Charlie'''', ''''Alice'''', ''''Bob'''', ''''Charlie''''],
''''Score'''': [85, 90, 95, 80, 85, 90]
})
grouped = df.groupby(''''Name'''')
for name, group in grouped:
print(f"Group: {name}\n", group)
Group: Alice
Name Score
0 Alice 85
3 Alice 80
Group: Bob
Name Score
1 Bob 90
4 Bob 85
Group: Charlie
Name Score
2 Charlie 95
5 Charlie 90
In [4]:
# Split DataFrame into Training and Testing Sets
import pandas as pd
from sklearn.model_selection import train_test_split
df = pd.DataFrame({
''''A'''': range(10),
''''B'''': range(10, 20)
})
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)
print("Training set:\n", train_df)
print("Testing set:\n", test_df)
Training set:
A B
0 0 10
7 7 17
2 2 12
9 9 19
4 4 14
3 3 13
6 6 16
Testing set:
A B
8 8 18
1 1 11
5 5 15