fix_csv: bonus 1
This commit is contained in:
parent
d70505ac1d
commit
aa060e4c33
|
|
@ -1,27 +1,28 @@
|
|||
import logging
|
||||
from pathlib import Path
|
||||
import csv
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
def escape(string):
|
||||
"""Return an escaped version of `string`."""
|
||||
string = string.replace('"', '""')
|
||||
if "," in string:
|
||||
string = '"' + string + '"'
|
||||
return string
|
||||
|
||||
|
||||
def normalise(input, output):
|
||||
def normalise(input, output, in_delimiter="|", in_quote='"'):
|
||||
"""Normalise `input` CSV file into `output` file."""
|
||||
Path(output).touch(exist_ok=True)
|
||||
with Path(input).open("r") as in_file, Path(output).open("w") as out_file:
|
||||
for in_line in in_file.readlines():
|
||||
out_elements = [
|
||||
escape(in_element) for in_element in in_line.strip().split("|")
|
||||
]
|
||||
out_file.write(f"{','.join(out_elements)}\n")
|
||||
with Path(input).open("r", newline="") as in_file, Path(output).open(
|
||||
"w"
|
||||
) as out_file:
|
||||
reader = csv.reader(in_file, delimiter=in_delimiter, quotechar=in_quote)
|
||||
writer = csv.writer(
|
||||
out_file,
|
||||
delimiter=",",
|
||||
quotechar=in_quote,
|
||||
quoting=csv.QUOTE_MINIMAL,
|
||||
lineterminator="\n",
|
||||
)
|
||||
for row in reader:
|
||||
row = [element.replace('"', '"""') for element in row]
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -32,6 +33,14 @@ if __name__ == "__main__":
|
|||
parser.add_argument("input")
|
||||
parser.add_argument("output")
|
||||
|
||||
parser.add_argument("--in-delimiter", dest="in_delimiter", default="|")
|
||||
parser.add_argument("--in-quote", dest="in_quote", default='"')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
normalise(args.input, args.output)
|
||||
normalise(
|
||||
input=args.input,
|
||||
output=args.output,
|
||||
in_delimiter=args.in_delimiter,
|
||||
in_quote=args.in_quote,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,29 +17,34 @@ class FixCSVTests(unittest.TestCase):
|
|||
maxDiff = None
|
||||
|
||||
def test_pipe_file_to_csv_file(self):
|
||||
old_contents = dedent("""
|
||||
old_contents = dedent(
|
||||
"""
|
||||
2012|Lexus|LFA
|
||||
2009|GMC|Yukon XL 1500
|
||||
1965|Ford|Mustang
|
||||
2005|Hyundai|Sonata
|
||||
1995|Mercedes-Benz|C-Class
|
||||
""").lstrip()
|
||||
expected = dedent("""
|
||||
"""
|
||||
).lstrip()
|
||||
expected = dedent(
|
||||
"""
|
||||
2012,Lexus,LFA
|
||||
2009,GMC,Yukon XL 1500
|
||||
1965,Ford,Mustang
|
||||
2005,Hyundai,Sonata
|
||||
1995,Mercedes-Benz,C-Class
|
||||
""").lstrip()
|
||||
"""
|
||||
).lstrip()
|
||||
with make_file(old_contents) as old, make_file("") as new:
|
||||
output = run_program(f'fix_csv.py {old} {new}')
|
||||
output = run_program(f"fix_csv.py {old} {new}")
|
||||
with open(new) as new_file:
|
||||
new_contents = new_file.read()
|
||||
self.assertEqual(expected, new_contents)
|
||||
self.assertEqual("", output)
|
||||
|
||||
def test_delimiter_in_output(self):
|
||||
old_contents = dedent("""
|
||||
old_contents = dedent(
|
||||
"""
|
||||
02|Waylon Jennings|Honky Tonk Heroes (Like Me)
|
||||
04|Kris Kristofferson|To Beat The Devil
|
||||
11|Johnny Cash|Folsom Prison Blues
|
||||
|
|
@ -47,8 +52,10 @@ class FixCSVTests(unittest.TestCase):
|
|||
21|Hank Williams III|Mississippi Mud
|
||||
22|David Allan Coe|Willie, Waylon, And Me
|
||||
24|Bob Dylan|House Of The Risin' Sun
|
||||
""").lstrip()
|
||||
expected = dedent("""
|
||||
"""
|
||||
).lstrip()
|
||||
expected = dedent(
|
||||
"""
|
||||
02,Waylon Jennings,Honky Tonk Heroes (Like Me)
|
||||
04,Kris Kristofferson,To Beat The Devil
|
||||
11,Johnny Cash,Folsom Prison Blues
|
||||
|
|
@ -56,21 +63,24 @@ class FixCSVTests(unittest.TestCase):
|
|||
21,Hank Williams III,Mississippi Mud
|
||||
22,David Allan Coe,"Willie, Waylon, And Me"
|
||||
24,Bob Dylan,House Of The Risin' Sun
|
||||
""").lstrip()
|
||||
"""
|
||||
).lstrip()
|
||||
with make_file(old_contents) as old, make_file("") as new:
|
||||
output = run_program(f'fix_csv.py {old} {new}')
|
||||
output = run_program(f"fix_csv.py {old} {new}")
|
||||
with open(new) as new_file:
|
||||
new_contents = new_file.read()
|
||||
self.assertEqual(expected, new_contents)
|
||||
self.assertEqual("", output)
|
||||
|
||||
def test_original_file_is_unchanged(self):
|
||||
old_contents = dedent("""
|
||||
old_contents = dedent(
|
||||
"""
|
||||
2012|Lexus|LFA
|
||||
2009|GMC|Yukon XL 1500
|
||||
""").lstrip()
|
||||
"""
|
||||
).lstrip()
|
||||
with make_file(old_contents) as old, make_file("") as new:
|
||||
run_program(f'fix_csv.py {old} {new}')
|
||||
run_program(f"fix_csv.py {old} {new}")
|
||||
with open(old) as old_file:
|
||||
contents = old_file.read()
|
||||
self.assertEqual(old_contents, contents)
|
||||
|
|
@ -78,54 +88,63 @@ class FixCSVTests(unittest.TestCase):
|
|||
def test_call_with_too_many_files(self):
|
||||
with make_file("") as old, make_file("") as new:
|
||||
with self.assertRaises(BaseException):
|
||||
run_program(f'fix_csv.py {old} {new} {old}')
|
||||
run_program(f"fix_csv.py {old} {new} {old}")
|
||||
|
||||
# To test the Bonus part of this exercise, comment out the following line
|
||||
@unittest.expectedFailure
|
||||
# @unittest.expectedFailure
|
||||
def test_in_delimiter_and_in_quote(self):
|
||||
old_contents = dedent("""
|
||||
old_contents = dedent(
|
||||
"""
|
||||
2012 Lexus "LFA"
|
||||
2009 GMC 'Yukon XL 1500'
|
||||
1995 "Mercedes-Benz" C-Class
|
||||
""").lstrip()
|
||||
expected1 = dedent("""
|
||||
"""
|
||||
).lstrip()
|
||||
expected1 = dedent(
|
||||
"""
|
||||
2012,Lexus,LFA
|
||||
2009,GMC,'Yukon,XL,1500'
|
||||
1995,Mercedes-Benz,C-Class
|
||||
""").lstrip()
|
||||
expected2 = dedent('''
|
||||
"""
|
||||
).lstrip()
|
||||
expected2 = dedent(
|
||||
'''
|
||||
2012,Lexus,"""LFA"""
|
||||
2009,GMC,Yukon XL 1500
|
||||
1995,"""Mercedes-Benz""",C-Class
|
||||
''').lstrip()
|
||||
'''
|
||||
).lstrip()
|
||||
with make_file(old_contents) as old, make_file("") as new:
|
||||
run_program(f'fix_csv.py {old} {new} --in-delimiter=" "')
|
||||
with open(new) as new_file:
|
||||
self.assertEqual(expected1, new_file.read())
|
||||
run_program(
|
||||
f'''fix_csv.py --in-delimiter=" " --in-quote="'" {old} {new}'''
|
||||
)
|
||||
run_program(f"""fix_csv.py --in-delimiter=" " --in-quote="'" {old} {new}""")
|
||||
with open(new) as new_file:
|
||||
self.assertEqual(expected2, new_file.read())
|
||||
|
||||
# To test the Bonus part of this exercise, comment out the following line
|
||||
@unittest.expectedFailure
|
||||
# @unittest.expectedFailure
|
||||
def test_autodetect_input_format(self):
|
||||
contents1 = dedent("""
|
||||
contents1 = dedent(
|
||||
"""
|
||||
'2012' 'Lexus' 'LFA'
|
||||
'2009' 'GMC' 'Yukon XL 1500'
|
||||
'1995' 'Mercedes-Benz' 'C-Class'
|
||||
""").lstrip()
|
||||
expected1 = dedent("""
|
||||
"""
|
||||
).lstrip()
|
||||
expected1 = dedent(
|
||||
"""
|
||||
2012,Lexus,LFA
|
||||
2009,GMC,Yukon XL 1500
|
||||
1995,Mercedes-Benz,C-Class
|
||||
""").lstrip()
|
||||
"""
|
||||
).lstrip()
|
||||
with make_file(contents1) as old, make_file("") as new:
|
||||
run_program(f'fix_csv.py {old} {new}')
|
||||
run_program(f"fix_csv.py {old} {new}")
|
||||
with open(new) as new_file:
|
||||
self.assertEqual(expected1, new_file.read())
|
||||
contents2 = dedent("""
|
||||
contents2 = dedent(
|
||||
"""
|
||||
"02"\t"Waylon Jennings"\t"Honky Tonk Heroes (Like Me)"\t"3:29"
|
||||
"04"\t"Kris Kristofferson"\t"To Beat The Devil"\t"4:05"
|
||||
"11"\t"Johnny Cash"\t"Folsom Prison Blues"\t"2:51"
|
||||
|
|
@ -133,8 +152,10 @@ class FixCSVTests(unittest.TestCase):
|
|||
"21"\t"Hank Williams III"\t"Mississippi Mud"\t"3:32"
|
||||
"22"\t"David Allan Coe"\t"Willie, Waylon, And Me"\t"3:24"
|
||||
"24"\t"Bob Dylan"\t"House Of The Risin' Sun"\t"5:20"
|
||||
""").lstrip()
|
||||
expected2 = dedent("""
|
||||
"""
|
||||
).lstrip()
|
||||
expected2 = dedent(
|
||||
"""
|
||||
02,Waylon Jennings,Honky Tonk Heroes (Like Me),3:29
|
||||
04,Kris Kristofferson,To Beat The Devil,4:05
|
||||
11,Johnny Cash,Folsom Prison Blues,2:51
|
||||
|
|
@ -142,9 +163,10 @@ class FixCSVTests(unittest.TestCase):
|
|||
21,Hank Williams III,Mississippi Mud,3:32
|
||||
22,David Allan Coe,"Willie, Waylon, And Me",3:24
|
||||
24,Bob Dylan,House Of The Risin' Sun,5:20
|
||||
""").lstrip()
|
||||
"""
|
||||
).lstrip()
|
||||
with make_file(contents2) as old, make_file("") as new:
|
||||
run_program(f'fix_csv.py {old} {new}')
|
||||
run_program(f"fix_csv.py {old} {new}")
|
||||
with open(new) as new_file:
|
||||
self.assertEqual(expected2, new_file.read())
|
||||
|
||||
|
|
@ -159,25 +181,25 @@ def run_program(arguments="", raises=DummyException):
|
|||
|
||||
If raises is specified, ensure the given exception is raised.
|
||||
"""
|
||||
arguments = arguments.replace('\\', '\\\\')
|
||||
arguments = arguments.replace("\\", "\\\\")
|
||||
path, *args = shlex.split(arguments)
|
||||
old_args = sys.argv
|
||||
warnings.simplefilter("ignore", ResourceWarning)
|
||||
try:
|
||||
sys.argv = [path] + args
|
||||
try:
|
||||
if '__main__' in sys.modules:
|
||||
del sys.modules['__main__']
|
||||
if "__main__" in sys.modules:
|
||||
del sys.modules["__main__"]
|
||||
with redirect_stdout(StringIO()) as output:
|
||||
with redirect_stderr(output):
|
||||
SourceFileLoader('__main__', path).load_module()
|
||||
SourceFileLoader("__main__", path).load_module()
|
||||
except raises:
|
||||
return output.getvalue()
|
||||
except SystemExit as e:
|
||||
if e.args != (0,):
|
||||
raise SystemExit(output.getvalue()) from e
|
||||
finally:
|
||||
sys.modules.pop('__main__', None)
|
||||
sys.modules.pop("__main__", None)
|
||||
if raises is not DummyException:
|
||||
raise AssertionError("{} not raised".format(raises))
|
||||
return output.getvalue()
|
||||
|
|
@ -188,7 +210,7 @@ def run_program(arguments="", raises=DummyException):
|
|||
@contextmanager
|
||||
def make_file(contents=None):
|
||||
"""Context manager providing name of a file containing given contents."""
|
||||
with NamedTemporaryFile(mode='wt', encoding='utf-8', delete=False) as f:
|
||||
with NamedTemporaryFile(mode="wt", encoding="utf-8", delete=False) as f:
|
||||
if contents:
|
||||
f.write(contents)
|
||||
try:
|
||||
|
|
@ -198,4 +220,4 @@ def make_file(contents=None):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
unittest.main(verbosity=2)
|
||||
|
|
|
|||
Loading…
Reference in a new issue