From 1f83b7a739502d2d826a0a2a95452a66dbf58273 Mon Sep 17 00:00:00 2001 From: Bryan Marcus McCann Date: Tue, 21 Aug 2018 02:47:14 +0000 Subject: [PATCH] interning strings to fix memory consumption --- dockerfiles/Dockerfile | 1 + text/torchtext/data/example.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile index 0864b498..5fb4518d 100644 --- a/dockerfiles/Dockerfile +++ b/dockerfiles/Dockerfile @@ -64,5 +64,6 @@ RUN apt-get install --yes \ RUN pip install records RUN pip install babel RUN pip install tabulate +RUN pip install pympler CMD bash diff --git a/text/torchtext/data/example.py b/text/torchtext/data/example.py index 576b46f6..0fc0dd1b 100644 --- a/text/torchtext/data/example.py +++ b/text/torchtext/data/example.py @@ -1,5 +1,6 @@ import csv import json +import sys import six @@ -58,7 +59,7 @@ class Example(object): if field is not None: if isinstance(val, six.string_types): val = val.rstrip('\n') - setattr(ex, name, field.preprocess(val)) + setattr(ex, name, [sys.intern(x) for x in field.preprocess(val)]) return ex @classmethod