from collections import Iterable from boltons.iterutils import remap def unique(seq): """Return a new sequence made of unique elements from seq. Code borrowed from the Internet. """ uniques = [] [uniques.append(x) for x in seq if not uniques.count(x)] return type(seq)(uniques) class StrType(str): """Helper class to print and compare object types (used in function 'get_structure'). When instanciating this class, provide the object whose type you want to print. __hash__ and __eq__ implemented in order to filter duplicates and yet stay compatible with uniqueness detection (to fit in sets for instance). """ def __init__(self, obj): self.value = type(obj).__name__ def __hash__(self): return hash(self.value) def __eq__(self, other): if not isinstance(other, StrType): return False else: return self.value == other.value def __str__(self): return f"<{self.value}>" __repr__ = __str__ def get_structure(data, folding_keys=[], folding_tag=" (folded)"): """Return a kind of map of the could-be nested data. Duplicates are removed from sequences. If 'folding_keys' are provided then the corresponding values in the data set will be folded, that is to say replaced by their data type (thus transformed into leaves). The key will be appended with 'folding_tag'. """ if folding_keys: def visit_fold(path, key, value): if key in folding_keys: # We only fold iterable types (except for strings and bytes). if (isinstance(data, Iterable) and not isinstance(data, str) and not isinstance(data, bytes)): return "".join([key, folding_tag]), type(value)() return True data = remap(data, visit=visit_fold) def extract_structure(data): if data and isinstance(data, dict): return {key:get_structure(value) for key, value in data.items()} elif data and (isinstance(data, Iterable) and not isinstance(data, str) and not isinstance(data, bytes)): # The iterable is not empty, we map each of its elements return unique(type(data)([get_structure(item) for item in data])) # It's a leaf (or empty iterable), # we return the name of its type return StrType(data) return extract_structure(data) if __name__ == "__main__": from pprint import pprint from datetime import datetime class MaClasse: pass nested = {'some types': {'MaClasse': MaClasse(), 'datetime': datetime.now(), 'empty-list': [], 'empty-tuple': (), 'float': 3.14, 'int': 0, 'list': [False,], 'set': {'a',}, 'str': 'Babylone', 'tuple': ('jah',)}, 'sequence types': {'lists': ['a', 'b', 'c', datetime.now(), [1, 2, 3], [1, 2, 3]], 'tuples': ('a', 'b', 'c', datetime.now(), [1, 2, 3], [1, 2, 3]), 'sets': {'hello', 'world', 42, True}}, '2 same dict 1 other': [{'same':3, 'as':b'the other'}, {'same':3, 'as':b'the other'}, {'different':'from','the':'other'}], 'play me': MaClasse, 'Two folded keys':{'Fold me :)':[1,2,3,4,5], 'Fold me too':"folded, can't see me!"}} structure = get_structure(nested, ['Fold me :)','Fold me too'], ' [+]') print("The nested data:") pprint(nested) print("") print("Its mapping:") pprint(structure) ## Output: ## ##{'2 same dict 1 other': [{'as': , 'same': }, ## {'different': , 'the': }], ## 'play me': , ## 'sequence types': {'lists': [, , []], ## 'sets': {, , }, ## 'tuples': (, , [])}, ## 'some types': {'MaClasse': , ## 'datetime': , ## 'empty-list': , ## 'empty-tuple': , ## 'float': , ## 'int': , ## 'list': [], ## 'set': {}, ## 'str': , ## 'tuple': (,)}}