53 lines
1.7 KiB
Python
53 lines
1.7 KiB
Python
import unittest
|
|
|
|
|
|
class stackoverflow_question_58827592_test_case(unittest.TestCase):
|
|
def test_stackoverflow_question_58827592(self):
|
|
"""
|
|
https://stackoverflow.com/questions/58827592/is-there-a-way-to-convert-csv-columns-into-hierarchical-relationships
|
|
"""
|
|
from benedict import benedict as bdict
|
|
|
|
data_source = """
|
|
RecordID,kingdom,phylum,class,order,family,genus,species
|
|
1,Animalia,Chordata,Mammalia,Primates,Hominidae,Homo,Homo sapiens
|
|
2,Animalia,Chordata,Mammalia,Carnivora,Canidae,Canis,Canis
|
|
3,Plantae,nan,Magnoliopsida,Brassicales,Brassicaceae,Arabidopsis,Arabidopsis thaliana
|
|
4,Plantae,nan,Magnoliopsida,Fabales,Fabaceae,Phaseoulus,Phaseolus vulgaris
|
|
"""
|
|
data_input = bdict.from_csv(data_source)
|
|
data_output = bdict()
|
|
|
|
ancestors_hierarchy = [
|
|
"kingdom",
|
|
"phylum",
|
|
"class",
|
|
"order",
|
|
"family",
|
|
"genus",
|
|
"species",
|
|
]
|
|
for value in data_input["values"]:
|
|
data_output[
|
|
".".join([value[ancestor] for ancestor in ancestors_hierarchy])
|
|
] = bdict()
|
|
|
|
# print(data_output.dump())
|
|
keypaths = sorted(
|
|
data_output.keypaths(), key=lambda item: len(item.split(".")), reverse=True
|
|
)
|
|
|
|
data_output["children"] = []
|
|
|
|
def transform_data(d, key, value):
|
|
if isinstance(value, dict):
|
|
value.update({"name": key, "children": []})
|
|
|
|
data_output.traverse(transform_data)
|
|
|
|
for keypath in keypaths:
|
|
target_keypath = ".".join(keypath.split(".")[:-1] + ["children"])
|
|
data_output[target_keypath].append(data_output.pop(keypath))
|
|
|
|
# print(data_output.dump())
|