2018-09-14 23:42:08 +02:00
|
|
|
from collections import Iterable
|
2018-09-17 22:11:46 +02:00
|
|
|
from boltons.iterutils import remap
|
2018-09-14 23:42:08 +02:00
|
|
|
|
|
|
|
|
def unique(seq):
|
2018-09-17 22:11:46 +02:00
|
|
|
"""Return a new sequence made of unique elements from seq.
|
|
|
|
|
Code borrowed from the Internet.
|
|
|
|
|
"""
|
2018-09-14 23:42:08 +02:00
|
|
|
uniques = []
|
|
|
|
|
[uniques.append(x) for x in seq if not uniques.count(x)]
|
|
|
|
|
return type(seq)(uniques)
|
|
|
|
|
|
|
|
|
|
|
2018-09-16 15:49:12 +02:00
|
|
|
class StrType(str):
|
2018-09-17 22:11:46 +02:00
|
|
|
"""Helper class to print and compare object types (used in function 'get_structure').
|
|
|
|
|
When instanciating this class, provide the object whose type you want to print.
|
|
|
|
|
__hash__ and __eq__ implemented in order to filter duplicates and yet stay compatible with
|
|
|
|
|
uniqueness detection (to fit in sets for instance).
|
2018-09-14 23:42:08 +02:00
|
|
|
"""
|
|
|
|
|
def __init__(self, obj):
|
2018-09-16 15:49:12 +02:00
|
|
|
self.value = type(obj).__name__
|
2018-09-17 22:11:46 +02:00
|
|
|
|
|
|
|
|
def __hash__(self):
|
|
|
|
|
return hash(self.value)
|
|
|
|
|
|
|
|
|
|
def __eq__(self, other):
|
|
|
|
|
if not isinstance(other, StrType):
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
return self.value == other.value
|
2018-09-14 23:42:08 +02:00
|
|
|
|
|
|
|
|
def __str__(self):
|
2018-09-16 15:49:12 +02:00
|
|
|
return f"<{self.value}>"
|
2018-09-14 23:42:08 +02:00
|
|
|
|
2018-09-16 15:49:12 +02:00
|
|
|
__repr__ = __str__
|
2018-09-14 23:42:08 +02:00
|
|
|
|
|
|
|
|
|
2018-09-17 22:11:46 +02:00
|
|
|
def get_structure(data, folding_keys=[], folding_tag=" (folded)"):
|
2018-09-14 23:42:08 +02:00
|
|
|
"""Return a kind of map of the could-be nested data.
|
|
|
|
|
Duplicates are removed from sequences.
|
2018-09-17 22:11:46 +02:00
|
|
|
If 'folding_keys' are provided then the corresponding values in the data set will be folded,
|
|
|
|
|
that is to say replaced by their data type (thus transformed into leaves). The key will be
|
|
|
|
|
appended with 'folding_tag'.
|
2018-09-14 23:42:08 +02:00
|
|
|
"""
|
2018-09-17 22:11:46 +02:00
|
|
|
if folding_keys:
|
|
|
|
|
def visit_fold(path, key, value):
|
|
|
|
|
if key in folding_keys:
|
|
|
|
|
# We only fold iterable types (except for strings and bytes).
|
|
|
|
|
if (isinstance(data, Iterable)
|
|
|
|
|
and not isinstance(data, str)
|
|
|
|
|
and not isinstance(data, bytes)):
|
|
|
|
|
return "".join([key, folding_tag]), type(value)()
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
data = remap(data, visit=visit_fold)
|
|
|
|
|
|
|
|
|
|
def extract_structure(data):
|
|
|
|
|
if data and isinstance(data, dict):
|
|
|
|
|
return {key:get_structure(value) for key, value in data.items()}
|
|
|
|
|
elif data and (isinstance(data, Iterable)
|
|
|
|
|
and not isinstance(data, str)
|
|
|
|
|
and not isinstance(data, bytes)):
|
|
|
|
|
# The iterable is not empty, we map each of its elements
|
2018-09-14 23:42:08 +02:00
|
|
|
return unique(type(data)([get_structure(item) for item in data]))
|
2018-09-17 22:11:46 +02:00
|
|
|
# It's a leaf (or empty iterable),
|
|
|
|
|
# we return the name of its type
|
|
|
|
|
return StrType(data)
|
|
|
|
|
|
|
|
|
|
return extract_structure(data)
|
2018-09-14 23:42:08 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
from pprint import pprint
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
class MaClasse:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
nested = {'some types': {'MaClasse': MaClasse(),
|
|
|
|
|
'datetime': datetime.now(),
|
|
|
|
|
'empty-list': [],
|
|
|
|
|
'empty-tuple': (),
|
|
|
|
|
'float': 3.14,
|
|
|
|
|
'int': 0,
|
|
|
|
|
'list': [False,],
|
|
|
|
|
'set': {'a',},
|
|
|
|
|
'str': 'Babylone',
|
|
|
|
|
'tuple': ('jah',)},
|
|
|
|
|
'sequence types': {'lists': ['a', 'b', 'c',
|
|
|
|
|
datetime.now(),
|
|
|
|
|
[1, 2, 3],
|
|
|
|
|
[1, 2, 3]],
|
|
|
|
|
'tuples': ('a', 'b', 'c',
|
|
|
|
|
datetime.now(),
|
|
|
|
|
[1, 2, 3],
|
|
|
|
|
[1, 2, 3]),
|
|
|
|
|
'sets': {'hello', 'world', 42, True}},
|
|
|
|
|
'2 same dict 1 other': [{'same':3, 'as':b'the other'},
|
|
|
|
|
{'same':3, 'as':b'the other'},
|
|
|
|
|
{'different':'from','the':'other'}],
|
2018-09-17 22:11:46 +02:00
|
|
|
'play me': MaClasse,
|
|
|
|
|
'Two folded keys':{'Fold me :)':[1,2,3,4,5],
|
|
|
|
|
'Fold me too':"folded, can't see me!"}}
|
2018-09-14 23:42:08 +02:00
|
|
|
|
2018-09-17 22:11:46 +02:00
|
|
|
structure = get_structure(nested, ['Fold me :)','Fold me too'], ' [+]')
|
2018-09-14 23:42:08 +02:00
|
|
|
|
|
|
|
|
print("The nested data:")
|
|
|
|
|
pprint(nested)
|
|
|
|
|
|
|
|
|
|
print("")
|
|
|
|
|
print("Its mapping:")
|
|
|
|
|
pprint(structure)
|
2018-09-14 23:48:51 +02:00
|
|
|
|
|
|
|
|
## Output:
|
|
|
|
|
##
|
|
|
|
|
##{'2 same dict 1 other': [{'as': <bytes>, 'same': <int>},
|
|
|
|
|
## {'different': <str>, 'the': <str>}],
|
2018-09-23 22:53:25 +02:00
|
|
|
## 'Two folded keys': {'Fold me :) [+]': <list>, 'Fold me too [+]': <str>},
|
2018-09-14 23:48:51 +02:00
|
|
|
## 'play me': <type>,
|
|
|
|
|
## 'sequence types': {'lists': [<str>, <datetime>, [<int>]],
|
2018-09-23 22:53:25 +02:00
|
|
|
## 'sets': {<int>, <bool>, <str>},
|
2018-09-14 23:48:51 +02:00
|
|
|
## 'tuples': (<str>, <datetime>, [<int>])},
|
|
|
|
|
## 'some types': {'MaClasse': <MaClasse>,
|
|
|
|
|
## 'datetime': <datetime>,
|
|
|
|
|
## 'empty-list': <list>,
|
|
|
|
|
## 'empty-tuple': <tuple>,
|
|
|
|
|
## 'float': <float>,
|
|
|
|
|
## 'int': <int>,
|
|
|
|
|
## 'list': [<bool>],
|
|
|
|
|
## 'set': {<str>},
|
|
|
|
|
## 'str': <str>,
|
|
|
|
|
## 'tuple': (<str>,)}}
|