In [8]:
# Writing to a TXT File with Double Quotes
import pandas as pd
import csv
data = {
''''Name'''': [''''Alice'''', ''''Bob'''', ''''Charlie''''],
''''Age'''': [25, 30, 35],
''''City'''': [''''New York'''', ''''Los Angeles'''', ''''Chicago'''']
}
df = pd.DataFrame(data)
df.to_csv(''''sample1.txt'''', sep='''';'''', quotechar=''''"'''', index=False, quoting=csv.QUOTE_NONNUMERIC)
In [9]:
# Reading a TXT File with Double Quotes
import pandas as pd
# Sample data in a TXT file (sample.txt)
# "Name";"Age";"City"
# "Alice";"25";"New York"
# "Bob";"30";"Los Angeles"
# "Charlie";"35";"Chicago"
df = pd.read_csv(''''sample1.txt'''', sep='''';'''', quotechar=''''"'''') #, skiprows=1
print(df)
Name Age City 0 Alice 25 New York 1 Bob 30 Los Angeles 2 Charlie 35 Chicago
In [10]:
# Writing to a TXT File and Ignoring Index
import pandas as pd
import csv
data = {
''''Name'''': [''''Alice'''', ''''Bob'''', ''''Charlie''''],
''''Age'''': [25, 30, 35],
''''City'''': [''''New York'''', ''''Los Angeles'''', ''''Chicago'''']
}
df = pd.DataFrame(data)
df.to_csv(''''sample2.txt'''', sep='''','''', quotechar=''''"'''', quoting=csv.QUOTE_NONNUMERIC, index=False)
df.to_csv(''''sample3.txt'''', sep=''''|'''', quotechar=''''"'''', quoting=csv.QUOTE_NONNUMERIC, index=False)
In [11]:
# Reading a TXT File
import pandas as pd
# Sample data in a TXT file (sample.txt)
# This is a header line to be ignored
# "Name","Age","City"
# "Alice","25","New York"
# "Bob","30","Los Angeles"
# "Charlie","35","Chicago"
df = pd.read_csv(''''sample2.txt'''', sep='''','''', quotechar=''''"'''') #, skiprows=1
df = pd.read_csv(''''sample3.txt'''', sep=''''|'''', quotechar=''''"'''')
print(df)
Name Age City 0 Alice 25 New York 1 Bob 30 Los Angeles 2 Charlie 35 Chicago
In [12]:
# Reading a TXT File with Custom Separator and Double Quotes
import pandas as pd
# Sample data in a TXT file (sample.txt)
# "Name"|"Age"|"City"
# "Alice"|"25"|"New York"
# "Bob"|"30"|"Los Angeles"
# "Charlie"|"35"|"Chicago"
# Reading the TXT file with a custom separator
df = pd.read_csv(''''sample3.txt'''', sep=''''|'''', quotechar=''''"'''')
# Display the DataFrame
print(df)
Name Age City 0 Alice 25 New York 1 Bob 30 Los Angeles 2 Charlie 35 Chicago
In [13]:
# Processing a Large Text File Line by Line
def process_line(line):
# Here you can add your logic to process each line
print(line.strip()) # For demonstration, just print the line
#with open(''''large_file.txt'''', ''''r'''') as file:
with open(''''sample3.txt'''', ''''r'''') as file:
# Iterate through each line in the file
for line in file:
process_line(line)
"Name"|"Age"|"City" "Alice"|25|"New York" "Bob"|30|"Los Angeles" "Charlie"|35|"Chicago"
In [14]:
# Handling Different Separators and Ignoring the First Line
def process_fields(fields):
# Here you can add your logic to process each field
print(fields) # For demonstration, just print the fields
#with open(''''large_file.txt'''', ''''r'''') as file:
with open(''''sample2.txt'''', ''''r'''') as file:
# Skip the first line (header)
next(file)
for line in file:
# Split the line by a specific separator (e.g., comma, semicolon)
fields = line.strip().split('''';'''')
process_fields(fields)
[''''"Alice",25,"New York"''''] [''''"Bob",30,"Los Angeles"''''] [''''"Charlie",35,"Chicago"'''']
In [15]:
# Using csv Module for More Complex Parsing
import csv
def process_row(row):
# Here you can add your logic to process each row
print(row) # For demonstration, just print the row
#with open(''''large_file.txt'''', ''''r'''') as file:
with open(''''sample2.txt'''', ''''r'''') as file:
reader = csv.reader(file, delimiter='''';'''', quotechar=''''"'''')
for row in reader:
process_row(row)
[''''Name,"Age","City"''''] [''''Alice,25,"New York"''''] [''''Bob,30,"Los Angeles"''''] [''''Charlie,35,"Chicago"'''']