Added groupby utility method.

This commit is contained in:
Fabio Caccamo 2020-02-06 15:14:36 +01:00
parent b3caf62c0f
commit eb676bcc8f
5 changed files with 112 additions and 0 deletions

View File

@ -5,6 +5,7 @@ from benedict.core.clone import clone
from benedict.core.dump import dump
from benedict.core.filter import filter
from benedict.core.flatten import flatten
from benedict.core.groupby import groupby
from benedict.core.invert import invert
from benedict.core.items_sorted import (
items_sorted_by_keys, items_sorted_by_values, )

17
benedict/core/groupby.py Normal file
View File

@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
from benedict.utils import type_util
def groupby(items, key):
if not type_util.is_list(items):
raise ValueError('items should be a list of dicts.')
items_grouped = {}
for item in items:
if not type_util.is_dict(item):
raise ValueError('item should be a dict.')
group = item.get(key)
if group not in items_grouped:
items_grouped[group] = []
items_grouped[group].append(item.copy())
return items_grouped

View File

@ -5,6 +5,7 @@ from benedict.core import clone as _clone
from benedict.core import dump as _dump
from benedict.core import filter as _filter
from benedict.core import flatten as _flatten
from benedict.core import groupby as _groupby
from benedict.core import invert as _invert
from benedict.core import items_sorted_by_keys as _items_sorted_by_keys
from benedict.core import items_sorted_by_values as _items_sorted_by_values
@ -91,6 +92,12 @@ class benedict(IODict, KeypathDict, ParseDict):
"""
return _flatten(self, separator)
def groupby(self, key, by_key):
"""
Group a list of dicts at key by the value of the given by_key and return a new dict.
"""
return benedict(_groupby(self[key], by_key))
def invert(self, flat=False):
"""
Return a new inverted dict, where values become keys and keys become values.

View File

@ -0,0 +1,47 @@
# -*- coding: utf-8 -*-
from benedict.core import clone as _clone
from benedict.core import groupby as _groupby
import unittest
class groupby_test_case(unittest.TestCase):
def test_groupby(self):
l = [
{'id':1, 'name':'John'},
{'id':2, 'name':'Frank'},
{'id':3, 'name':'Tony'},
{'id':4, 'name':'Jimmy'},
{'id':3, 'name':'Sam'},
{'id':1, 'name':'Charles'},
{'id':3, 'name':'Bob'},
{'id':4, 'name':'Paul'},
{'id':1, 'name':'Michael'},
]
l_clone = _clone(l)
d = _groupby(l, 'id')
self.assertEqual(l, l_clone)
self.assertTrue(isinstance(d, dict))
self.assertEqual(len(d), 4)
self.assertTrue(all([
1 in d, 2 in d, 3 in d, 4 in d, ]))
self.assertTrue(all([
isinstance(d[1], list), isinstance(d[2], list),
isinstance(d[3], list), isinstance(d[4], list), ]))
self.assertEqual(len(d[1]), 3)
self.assertEqual(len(d[2]), 1)
self.assertEqual(len(d[3]), 3)
self.assertEqual(len(d[4]), 2)
def test_groupby_with_wrong_input(self):
l = {'id':1, 'name':'John'}
with self.assertRaises(ValueError):
d = _groupby(l, 'id')
l = [
[{'id':1, 'name':'John'}],
[{'id':2, 'name':'Frank'},],
]
with self.assertRaises(ValueError):
d = _groupby(l, 'id')

View File

@ -772,6 +772,46 @@ b:
self.assertEqual(p, {})
# self.assertTrue(isinstance(p, benedict))
def test_groupby(self):
d = {
'cities':[
{'country_code':'IT', 'name':'Torino',},
{'country_code':'DE', 'name':'Berlin',},
{'country_code':'IT', 'name':'Milano',},
{'country_code':'FR', 'name':'Paris',},
{'country_code':'IT', 'name':'Venezia',},
{'country_code':'IT', 'name':'Roma',},
{'country_code':'FR', 'name':'Lyon',},
{'country_code':'IT', 'name':'Napoli',},
{'country_code':'DE', 'name':'Munich',},
{'country_code':'IT', 'name':'Palermo',},
],
}
bd = benedict(d)
bd_cities = bd['cities']
g = bd.groupby('cities', 'country_code')
self.assertEqual(len(g), 3)
self.assertTrue('IT' in g)
self.assertTrue('FR' in g)
self.assertTrue('DE' in g)
self.assertEqual(len(g['IT']), 6)
self.assertTrue(bd_cities[0] in g['IT'])
self.assertTrue(bd_cities[2] in g['IT'])
self.assertTrue(bd_cities[4] in g['IT'])
self.assertTrue(bd_cities[5] in g['IT'])
self.assertTrue(bd_cities[7] in g['IT'])
self.assertTrue(bd_cities[9] in g['IT'])
self.assertEqual(len(g['FR']), 2)
self.assertTrue(bd_cities[3] in g['FR'])
self.assertTrue(bd_cities[6] in g['FR'])
self.assertEqual(len(g['DE']), 2)
self.assertTrue(bd_cities[1] in g['DE'])
self.assertTrue(bd_cities[8] in g['DE'])
def test_invert(self):
d = {
'a': 1,