From 847a9dd0947a6360ce41cb480a83a3b732ea7894 Mon Sep 17 00:00:00 2001 From: Bryan Marcus McCann Date: Wed, 29 Aug 2018 02:44:09 +0000 Subject: [PATCH] bug fix for string interning --- text/torchtext/data/example.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/text/torchtext/data/example.py b/text/torchtext/data/example.py index 400c6cd5..82690a71 100644 --- a/text/torchtext/data/example.py +++ b/text/torchtext/data/example.py @@ -5,6 +5,18 @@ import json import six +def intern_strings(x): + if isinstance(x, (list, tuple)): + r = [] + for y in x: + if isinstance(y, str): + r.append(sys.intern(y)) + else: + r.append(y) + return r + return x + + class Example(object): """Defines a single training or test example. @@ -27,7 +39,7 @@ class Example(object): vals = [vals] for val in vals: name, field = val - setattr(ex, name, [sys.intern(x) for x in field.preprocess(data[key])]) + setattr(ex, name, intern_strings(field.preprocess(data[key]))) return ex @classmethod @@ -59,7 +71,7 @@ class Example(object): if field is not None: if isinstance(val, six.string_types): val = val.rstrip('\n') - setattr(ex, name, [sys.intern(x) for x in field.preprocess(val)]) + setattr(ex, name, intern_strings(field.preprocess(val))) return ex @classmethod