2021-01-25 22:39:04 +01:00
|
|
|
import logging
|
|
|
|
|
from pathlib import Path
|
2021-01-25 23:42:24 +01:00
|
|
|
import csv
|
2021-01-25 22:39:04 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
2021-01-25 23:50:25 +01:00
|
|
|
def normalise(input, output, in_delimiter=None, in_quote=None):
|
2021-01-25 22:39:04 +01:00
|
|
|
"""Normalise `input` CSV file into `output` file."""
|
|
|
|
|
Path(output).touch(exist_ok=True)
|
2021-01-25 23:50:25 +01:00
|
|
|
with Path(input).open("r", newline="") as in_file:
|
|
|
|
|
dialect = csv.Sniffer().sniff(in_file.read())
|
|
|
|
|
if in_delimiter is None:
|
|
|
|
|
in_delimiter = dialect.delimiter
|
|
|
|
|
if in_quote is None:
|
|
|
|
|
in_quote = dialect.quotechar
|
2021-01-25 23:42:24 +01:00
|
|
|
with Path(input).open("r", newline="") as in_file, Path(output).open(
|
|
|
|
|
"w"
|
|
|
|
|
) as out_file:
|
|
|
|
|
reader = csv.reader(in_file, delimiter=in_delimiter, quotechar=in_quote)
|
|
|
|
|
writer = csv.writer(
|
|
|
|
|
out_file,
|
|
|
|
|
delimiter=",",
|
|
|
|
|
quotechar=in_quote,
|
|
|
|
|
quoting=csv.QUOTE_MINIMAL,
|
|
|
|
|
lineterminator="\n",
|
|
|
|
|
)
|
|
|
|
|
for row in reader:
|
|
|
|
|
row = [element.replace('"', '"""') for element in row]
|
|
|
|
|
writer.writerow(row)
|
2021-01-25 22:39:04 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Smol prog to normalise CSV files.")
|
|
|
|
|
|
|
|
|
|
parser.add_argument("input")
|
|
|
|
|
parser.add_argument("output")
|
|
|
|
|
|
2021-01-25 23:50:25 +01:00
|
|
|
parser.add_argument("--in-delimiter", dest="in_delimiter", default=None)
|
|
|
|
|
parser.add_argument("--in-quote", dest="in_quote", default=None)
|
2021-01-25 23:42:24 +01:00
|
|
|
|
2021-01-25 22:39:04 +01:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
2021-01-25 23:42:24 +01:00
|
|
|
normalise(
|
|
|
|
|
input=args.input,
|
|
|
|
|
output=args.output,
|
|
|
|
|
in_delimiter=args.in_delimiter,
|
|
|
|
|
in_quote=args.in_quote,
|
|
|
|
|
)
|