diff --git a/fix_csv/fix_csv.py b/fix_csv/fix_csv.py index c6595e7..a189665 100644 --- a/fix_csv/fix_csv.py +++ b/fix_csv/fix_csv.py @@ -1,27 +1,28 @@ import logging from pathlib import Path +import csv logging.basicConfig(level=logging.INFO) -def escape(string): - """Return an escaped version of `string`.""" - string = string.replace('"', '""') - if "," in string: - string = '"' + string + '"' - return string - - -def normalise(input, output): +def normalise(input, output, in_delimiter="|", in_quote='"'): """Normalise `input` CSV file into `output` file.""" Path(output).touch(exist_ok=True) - with Path(input).open("r") as in_file, Path(output).open("w") as out_file: - for in_line in in_file.readlines(): - out_elements = [ - escape(in_element) for in_element in in_line.strip().split("|") - ] - out_file.write(f"{','.join(out_elements)}\n") + with Path(input).open("r", newline="") as in_file, Path(output).open( + "w" + ) as out_file: + reader = csv.reader(in_file, delimiter=in_delimiter, quotechar=in_quote) + writer = csv.writer( + out_file, + delimiter=",", + quotechar=in_quote, + quoting=csv.QUOTE_MINIMAL, + lineterminator="\n", + ) + for row in reader: + row = [element.replace('"', '"""') for element in row] + writer.writerow(row) if __name__ == "__main__": @@ -32,6 +33,14 @@ if __name__ == "__main__": parser.add_argument("input") parser.add_argument("output") + parser.add_argument("--in-delimiter", dest="in_delimiter", default="|") + parser.add_argument("--in-quote", dest="in_quote", default='"') + args = parser.parse_args() - normalise(args.input, args.output) + normalise( + input=args.input, + output=args.output, + in_delimiter=args.in_delimiter, + in_quote=args.in_quote, + ) diff --git a/fix_csv/test_fix_csv.py b/fix_csv/test_fix_csv.py index 5bb0aeb..843971d 100644 --- a/fix_csv/test_fix_csv.py +++ b/fix_csv/test_fix_csv.py @@ -17,29 +17,34 @@ class FixCSVTests(unittest.TestCase): maxDiff = None def test_pipe_file_to_csv_file(self): - old_contents = dedent(""" + old_contents = dedent( + """ 2012|Lexus|LFA 2009|GMC|Yukon XL 1500 1965|Ford|Mustang 2005|Hyundai|Sonata 1995|Mercedes-Benz|C-Class - """).lstrip() - expected = dedent(""" + """ + ).lstrip() + expected = dedent( + """ 2012,Lexus,LFA 2009,GMC,Yukon XL 1500 1965,Ford,Mustang 2005,Hyundai,Sonata 1995,Mercedes-Benz,C-Class - """).lstrip() + """ + ).lstrip() with make_file(old_contents) as old, make_file("") as new: - output = run_program(f'fix_csv.py {old} {new}') + output = run_program(f"fix_csv.py {old} {new}") with open(new) as new_file: new_contents = new_file.read() self.assertEqual(expected, new_contents) self.assertEqual("", output) def test_delimiter_in_output(self): - old_contents = dedent(""" + old_contents = dedent( + """ 02|Waylon Jennings|Honky Tonk Heroes (Like Me) 04|Kris Kristofferson|To Beat The Devil 11|Johnny Cash|Folsom Prison Blues @@ -47,8 +52,10 @@ class FixCSVTests(unittest.TestCase): 21|Hank Williams III|Mississippi Mud 22|David Allan Coe|Willie, Waylon, And Me 24|Bob Dylan|House Of The Risin' Sun - """).lstrip() - expected = dedent(""" + """ + ).lstrip() + expected = dedent( + """ 02,Waylon Jennings,Honky Tonk Heroes (Like Me) 04,Kris Kristofferson,To Beat The Devil 11,Johnny Cash,Folsom Prison Blues @@ -56,21 +63,24 @@ class FixCSVTests(unittest.TestCase): 21,Hank Williams III,Mississippi Mud 22,David Allan Coe,"Willie, Waylon, And Me" 24,Bob Dylan,House Of The Risin' Sun - """).lstrip() + """ + ).lstrip() with make_file(old_contents) as old, make_file("") as new: - output = run_program(f'fix_csv.py {old} {new}') + output = run_program(f"fix_csv.py {old} {new}") with open(new) as new_file: new_contents = new_file.read() self.assertEqual(expected, new_contents) self.assertEqual("", output) def test_original_file_is_unchanged(self): - old_contents = dedent(""" + old_contents = dedent( + """ 2012|Lexus|LFA 2009|GMC|Yukon XL 1500 - """).lstrip() + """ + ).lstrip() with make_file(old_contents) as old, make_file("") as new: - run_program(f'fix_csv.py {old} {new}') + run_program(f"fix_csv.py {old} {new}") with open(old) as old_file: contents = old_file.read() self.assertEqual(old_contents, contents) @@ -78,54 +88,63 @@ class FixCSVTests(unittest.TestCase): def test_call_with_too_many_files(self): with make_file("") as old, make_file("") as new: with self.assertRaises(BaseException): - run_program(f'fix_csv.py {old} {new} {old}') + run_program(f"fix_csv.py {old} {new} {old}") # To test the Bonus part of this exercise, comment out the following line - @unittest.expectedFailure + # @unittest.expectedFailure def test_in_delimiter_and_in_quote(self): - old_contents = dedent(""" + old_contents = dedent( + """ 2012 Lexus "LFA" 2009 GMC 'Yukon XL 1500' 1995 "Mercedes-Benz" C-Class - """).lstrip() - expected1 = dedent(""" + """ + ).lstrip() + expected1 = dedent( + """ 2012,Lexus,LFA 2009,GMC,'Yukon,XL,1500' 1995,Mercedes-Benz,C-Class - """).lstrip() - expected2 = dedent(''' + """ + ).lstrip() + expected2 = dedent( + ''' 2012,Lexus,"""LFA""" 2009,GMC,Yukon XL 1500 1995,"""Mercedes-Benz""",C-Class - ''').lstrip() + ''' + ).lstrip() with make_file(old_contents) as old, make_file("") as new: run_program(f'fix_csv.py {old} {new} --in-delimiter=" "') with open(new) as new_file: self.assertEqual(expected1, new_file.read()) - run_program( - f'''fix_csv.py --in-delimiter=" " --in-quote="'" {old} {new}''' - ) + run_program(f"""fix_csv.py --in-delimiter=" " --in-quote="'" {old} {new}""") with open(new) as new_file: self.assertEqual(expected2, new_file.read()) # To test the Bonus part of this exercise, comment out the following line - @unittest.expectedFailure + # @unittest.expectedFailure def test_autodetect_input_format(self): - contents1 = dedent(""" + contents1 = dedent( + """ '2012' 'Lexus' 'LFA' '2009' 'GMC' 'Yukon XL 1500' '1995' 'Mercedes-Benz' 'C-Class' - """).lstrip() - expected1 = dedent(""" + """ + ).lstrip() + expected1 = dedent( + """ 2012,Lexus,LFA 2009,GMC,Yukon XL 1500 1995,Mercedes-Benz,C-Class - """).lstrip() + """ + ).lstrip() with make_file(contents1) as old, make_file("") as new: - run_program(f'fix_csv.py {old} {new}') + run_program(f"fix_csv.py {old} {new}") with open(new) as new_file: self.assertEqual(expected1, new_file.read()) - contents2 = dedent(""" + contents2 = dedent( + """ "02"\t"Waylon Jennings"\t"Honky Tonk Heroes (Like Me)"\t"3:29" "04"\t"Kris Kristofferson"\t"To Beat The Devil"\t"4:05" "11"\t"Johnny Cash"\t"Folsom Prison Blues"\t"2:51" @@ -133,8 +152,10 @@ class FixCSVTests(unittest.TestCase): "21"\t"Hank Williams III"\t"Mississippi Mud"\t"3:32" "22"\t"David Allan Coe"\t"Willie, Waylon, And Me"\t"3:24" "24"\t"Bob Dylan"\t"House Of The Risin' Sun"\t"5:20" - """).lstrip() - expected2 = dedent(""" + """ + ).lstrip() + expected2 = dedent( + """ 02,Waylon Jennings,Honky Tonk Heroes (Like Me),3:29 04,Kris Kristofferson,To Beat The Devil,4:05 11,Johnny Cash,Folsom Prison Blues,2:51 @@ -142,9 +163,10 @@ class FixCSVTests(unittest.TestCase): 21,Hank Williams III,Mississippi Mud,3:32 22,David Allan Coe,"Willie, Waylon, And Me",3:24 24,Bob Dylan,House Of The Risin' Sun,5:20 - """).lstrip() + """ + ).lstrip() with make_file(contents2) as old, make_file("") as new: - run_program(f'fix_csv.py {old} {new}') + run_program(f"fix_csv.py {old} {new}") with open(new) as new_file: self.assertEqual(expected2, new_file.read()) @@ -159,25 +181,25 @@ def run_program(arguments="", raises=DummyException): If raises is specified, ensure the given exception is raised. """ - arguments = arguments.replace('\\', '\\\\') + arguments = arguments.replace("\\", "\\\\") path, *args = shlex.split(arguments) old_args = sys.argv warnings.simplefilter("ignore", ResourceWarning) try: sys.argv = [path] + args try: - if '__main__' in sys.modules: - del sys.modules['__main__'] + if "__main__" in sys.modules: + del sys.modules["__main__"] with redirect_stdout(StringIO()) as output: with redirect_stderr(output): - SourceFileLoader('__main__', path).load_module() + SourceFileLoader("__main__", path).load_module() except raises: return output.getvalue() except SystemExit as e: if e.args != (0,): raise SystemExit(output.getvalue()) from e finally: - sys.modules.pop('__main__', None) + sys.modules.pop("__main__", None) if raises is not DummyException: raise AssertionError("{} not raised".format(raises)) return output.getvalue() @@ -188,7 +210,7 @@ def run_program(arguments="", raises=DummyException): @contextmanager def make_file(contents=None): """Context manager providing name of a file containing given contents.""" - with NamedTemporaryFile(mode='wt', encoding='utf-8', delete=False) as f: + with NamedTemporaryFile(mode="wt", encoding="utf-8", delete=False) as f: if contents: f.write(contents) try: @@ -198,4 +220,4 @@ def make_file(contents=None): if __name__ == "__main__": - unittest.main(verbosity=2) \ No newline at end of file + unittest.main(verbosity=2)