In [ ]:
!pip install boto3
In [ ]:
import pandas as pd
import boto3
from io import StringIO
# Replace with your actual credentials and bucket/file details
aws_access_key_id = ''''YOUR_ACCESS_KEY''''
aws_secret_access_key = ''''YOUR_SECRET_KEY''''
bucket_name = ''''your-bucket-name''''
file_key = ''''your-file.csv''''
# Authenticate and fetch the file from S3
s3 = boto3.client(''''s3'''', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
data = obj[''''Body''''].read().decode(''''utf-8'''')
# Create DataFrame
df = pd.read_csv(StringIO(data))
print(df.head())
In [ ]:
import boto3
import pandas as pd
from io import StringIO
# Replace with your actual credentials and bucket details
aws_access_key_id = ''''YOUR_ACCESS_KEY''''
aws_secret_access_key = ''''YOUR_SECRET_KEY''''
bucket_name = ''''your-bucket-name''''
folder_prefix = ''''your-folder/''''
s3 = boto3.client(''''s3'''', aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key)
# List all files in the bucket
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_prefix)
files = [content[''''Key''''] for content in response.get(''''Contents'''', []) if content[''''Key''''].endswith(''''.csv'''')]
# Read multiple CSV files into DataFrames
dfs = []
for file_key in files:
obj = s3.get_object(Bucket=bucket_name, Key=file_key)
data = obj[''''Body''''].read().decode(''''utf-8'''')
df = pd.read_csv(StringIO(data))
dfs.append(df)
# Combine all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)