Fix naming conflict and formatting

This commit is contained in:
Ines Montani 2020-05-21 14:24:38 +02:00
parent bd6353715a
commit bea863acd2
1 changed files with 10 additions and 14 deletions

View File

@ -169,19 +169,16 @@ class ChineseTokenizer(DummyTokenizer):
return util.to_bytes(serializers, []) return util.to_bytes(serializers, [])
def from_bytes(self, data, **kwargs): def from_bytes(self, data, **kwargs):
data = {"features_b": b"", "weights_b": b"", "processors_data": None} pkuseg_data = {"features_b": b"", "weights_b": b"", "processors_data": None}
# pkuseg_features_b = b""
# pkuseg_weights_b = b""
# pkuseg_processors_data = None
def deserialize_pkuseg_features(b): def deserialize_pkuseg_features(b):
data["features_b"] = b pkuseg_data["features_b"] = b
def deserialize_pkuseg_weights(b): def deserialize_pkuseg_weights(b):
data["weights_b"] = b pkuseg_data["weights_b"] = b
def deserialize_pkuseg_processors(b): def deserialize_pkuseg_processors(b):
data["processors_data"] = srsly.msgpack_loads(b) pkuseg_data["processors_data"] = srsly.msgpack_loads(b)
deserializers = OrderedDict( deserializers = OrderedDict(
( (
@ -193,13 +190,13 @@ class ChineseTokenizer(DummyTokenizer):
) )
util.from_bytes(data, deserializers, []) util.from_bytes(data, deserializers, [])
if data["features_b"] and data["weights_b"]: if pkuseg_data["features_b"] and pkuseg_data["weights_b"]:
with tempfile.TemporaryDirectory() as tempdir: with tempfile.TemporaryDirectory() as tempdir:
tempdir = Path(tempdir) tempdir = Path(tempdir)
with open(tempdir / "features.pkl", "wb") as fileh: with open(tempdir / "features.pkl", "wb") as fileh:
fileh.write(data["features_b"]) fileh.write(pkuseg_data["features_b"])
with open(tempdir / "weights.npz", "wb") as fileh: with open(tempdir / "weights.npz", "wb") as fileh:
fileh.write(data["weights_b"]) fileh.write(pkuseg_data["weights_b"])
try: try:
import pkuseg import pkuseg
except ImportError: except ImportError:
@ -208,10 +205,9 @@ class ChineseTokenizer(DummyTokenizer):
+ _PKUSEG_INSTALL_MSG + _PKUSEG_INSTALL_MSG
) )
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir)) self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
if data["processors_data"]: if pkuseg_data["processors_data"]:
(user_dict, do_process, common_words, other_words) = data[ processors_data = pkuseg_data["processors_data"]
"processors_data" (user_dict, do_process, common_words, other_words) = processors_data
]
self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict) self.pkuseg_seg.preprocesser = pkuseg.Preprocesser(user_dict)
self.pkuseg_seg.postprocesser.do_process = do_process self.pkuseg_seg.postprocesser.do_process = do_process
self.pkuseg_seg.postprocesser.common_words = set(common_words) self.pkuseg_seg.postprocesser.common_words = set(common_words)