In [ ]:
!pip install google-cloud-storage
In [ ]:
import pandas as pd
from google.cloud import storage
# Replace with your actual credentials and bucket/file details
credentials_json = {
"type": "service_account",
"project_id": "your-project-id",
"private_key_id": "your-private-key-id",
"private_key": "your-private-key",
"client_email": "your-client-email",
"client_id": "your-client-id",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "your-client-x509-cert-url"
}
# Authenticate
client = storage.Client.from_service_account_info(credentials_json)
bucket = client.get_bucket(''''your-bucket-name'''')
blob = bucket.blob(''''your-file.csv'''')
# Download the file as a string
data = blob.download_as_string().decode(''''utf-8'''')
# Create DataFrame
df = pd.read_csv(pd.compat.StringIO(data))
print(df.head())
In [ ]:
from google.cloud import storage
import pandas as pd
# Replace with your actual credentials and bucket details
credentials_json = {
"type": "service_account",
"project_id": "your-project-id",
"private_key_id": "your-private-key-id",
"private_key": "your-private-key",
"client_email": "your-client-email",
"client_id": "your-client-id",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "your-client-x509-cert-url"
}
client = storage.Client.from_service_account_info(credentials_json)
bucket = client.get_bucket(''''your-bucket-name'''')
# List all files in the bucket
blobs = bucket.list_blobs(prefix=''''your-folder/'''')
# Read multiple CSV files into DataFrames
dfs = []
for blob in blobs:
if blob.name.endswith(''''.csv''''):
data = blob.download_as_string().decode(''''utf-8'''')
df = pd.read_csv(pd.compat.StringIO(data))
dfs.append(df)
# Combine all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)
print(combined_df.head())
In [ ]:
In [ ]: