python_morsels/fix_csv/fix_csv.py

53 lines
1.5 KiB
Python
Raw Normal View History

2021-01-25 22:39:04 +01:00
import logging
from pathlib import Path
2021-01-25 23:42:24 +01:00
import csv
2021-01-25 22:39:04 +01:00
logging.basicConfig(level=logging.INFO)
2021-01-25 23:50:25 +01:00
def normalise(input, output, in_delimiter=None, in_quote=None):
2021-01-25 22:39:04 +01:00
"""Normalise `input` CSV file into `output` file."""
Path(output).touch(exist_ok=True)
2021-01-25 23:50:25 +01:00
with Path(input).open("r", newline="") as in_file:
dialect = csv.Sniffer().sniff(in_file.read())
if in_delimiter is None:
in_delimiter = dialect.delimiter
if in_quote is None:
in_quote = dialect.quotechar
2021-01-25 23:42:24 +01:00
with Path(input).open("r", newline="") as in_file, Path(output).open(
"w"
) as out_file:
reader = csv.reader(in_file, delimiter=in_delimiter, quotechar=in_quote)
writer = csv.writer(
out_file,
delimiter=",",
quotechar=in_quote,
quoting=csv.QUOTE_MINIMAL,
lineterminator="\n",
)
for row in reader:
row = [element.replace('"', '"""') for element in row]
writer.writerow(row)
2021-01-25 22:39:04 +01:00
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Smol prog to normalise CSV files.")
parser.add_argument("input")
parser.add_argument("output")
2021-01-25 23:50:25 +01:00
parser.add_argument("--in-delimiter", dest="in_delimiter", default=None)
parser.add_argument("--in-quote", dest="in_quote", default=None)
2021-01-25 23:42:24 +01:00
2021-01-25 22:39:04 +01:00
args = parser.parse_args()
2021-01-25 23:42:24 +01:00
normalise(
input=args.input,
output=args.output,
in_delimiter=args.in_delimiter,
in_quote=args.in_quote,
)