From d9ea25590e95ca9e70401123a0f1f59de711e2ff Mon Sep 17 00:00:00 2001 From: Lezwon Castelino Date: Wed, 26 Aug 2020 21:52:19 +0530 Subject: [PATCH] fix ONNX model save on GPU (#3145) * added to(device) * added test * fix test on gpu * Update pytorch_lightning/core/lightning.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * Update pytorch_lightning/core/lightning.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * remove multi gpu check Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * updated message * Update pytorch_lightning/core/lightning.py Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> * updated test * onxx to onnx * Update pytorch_lightning/core/lightning.py Co-authored-by: Rohit Gupta * Update tests/models/test_onnx.py Co-authored-by: Rohit Gupta * add no grad Co-authored-by: Rohit Gupta * add isinstance back * chlog * error is input_sample is not Tensor Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: Rohit Gupta Co-authored-by: Jirka Borovec --- CHANGELOG.md | 2 ++ pytorch_lightning/core/lightning.py | 11 +++++--- tests/models/test_onnx.py | 42 +++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d84bb7067..0323e81349 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed RMSLE metric ([#3188](https://github.com/PyTorchLightning/pytorch-lightning/pull/3188)) +- Fixed ONNX model save on GPU ([#3145](https://github.com/PyTorchLightning/pytorch-lightning/pull/3145)) + ## [0.9.0] - YYYY-MM-DD ### Added diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index a726cf31fe..f7723e6945 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1716,11 +1716,16 @@ class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, Mod elif self.example_input_array is not None: input_data = self.example_input_array else: - raise ValueError('`input_sample` and `example_input_array` tensors are both missing.') - + if input_sample is not None: + raise ValueError(f'Received `input_sample` of type {type(input_sample)}. Expected type is `Tensor`') + else: + raise ValueError('Could not export to ONNX since neither `input_sample` nor' + ' `model.example_input_array` attribute is set.') + input_data = input_data.to(self.device) if 'example_outputs' not in kwargs: self.eval() - kwargs['example_outputs'] = self(input_data) + with torch.no_grad(): + kwargs['example_outputs'] = self(input_data) torch.onnx.export(self, input_data, file_path, **kwargs) diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py index 278465941a..450e1bb786 100644 --- a/tests/models/test_onnx.py +++ b/tests/models/test_onnx.py @@ -17,7 +17,21 @@ def test_model_saves_with_input_sample(tmpdir): trainer = Trainer(max_epochs=1) trainer.fit(model) - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") + input_sample = torch.randn((1, 28 * 28)) + model.to_onnx(file_path, input_sample) + assert os.path.isfile(file_path) + assert os.path.getsize(file_path) > 3e+06 + + +@pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") +def test_model_saves_on_gpu(tmpdir): + """Test that model saves on gpu""" + model = EvalModelTemplate() + trainer = Trainer(gpus=1, max_epochs=1) + trainer.fit(model) + + file_path = os.path.join(tmpdir, "model.onnx") input_sample = torch.randn((1, 28 * 28)) model.to_onnx(file_path, input_sample) assert os.path.isfile(file_path) @@ -30,7 +44,7 @@ def test_model_saves_with_example_output(tmpdir): trainer = Trainer(max_epochs=1) trainer.fit(model) - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") input_sample = torch.randn((1, 28 * 28)) model.eval() example_outputs = model.forward(input_sample) @@ -41,7 +55,7 @@ def test_model_saves_with_example_output(tmpdir): def test_model_saves_with_example_input_array(tmpdir): """Test that ONNX model saves with_example_input_array and size is greater than 3 MB""" model = EvalModelTemplate() - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") model.to_onnx(file_path) assert os.path.exists(file_path) is True assert os.path.getsize(file_path) > 3e+06 @@ -66,7 +80,7 @@ def test_model_saves_on_multi_gpu(tmpdir): tpipes.run_model_test(trainer_options, model) - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") model.to_onnx(file_path) assert os.path.exists(file_path) is True @@ -74,7 +88,7 @@ def test_model_saves_on_multi_gpu(tmpdir): def test_verbose_param(tmpdir, capsys): """Test that output is present when verbose parameter is set""" model = EvalModelTemplate() - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") model.to_onnx(file_path, verbose=True) captured = capsys.readouterr() assert "graph(%" in captured.out @@ -84,11 +98,23 @@ def test_error_if_no_input(tmpdir): """Test that an exception is thrown when there is no input tensor""" model = EvalModelTemplate() model.example_input_array = None - file_path = os.path.join(tmpdir, "model.onxx") - with pytest.raises(ValueError, match=r'`input_sample` and `example_input_array` tensors are both missing'): + file_path = os.path.join(tmpdir, "model.onnx") + with pytest.raises(ValueError, match=r'Could not export to ONNX since neither `input_sample` nor' + r' `model.example_input_array` attribute is set.'): model.to_onnx(file_path) +def test_error_if_input_sample_is_not_tensor(tmpdir): + """Test that an exception is thrown when there is no input tensor""" + model = EvalModelTemplate() + model.example_input_array = None + file_path = os.path.join(tmpdir, "model.onnx") + input_sample = np.random.randn(1, 28 * 28) + with pytest.raises(ValueError, match=f'Received `input_sample` of type {type(input_sample)}. Expected type is ' + f'`Tensor`'): + model.to_onnx(file_path, input_sample) + + def test_if_inference_output_is_valid(tmpdir): """Test that the output inferred from ONNX model is same as from PyTorch""" model = EvalModelTemplate() @@ -99,7 +125,7 @@ def test_if_inference_output_is_valid(tmpdir): with torch.no_grad(): torch_out = model(model.example_input_array) - file_path = os.path.join(tmpdir, "model.onxx") + file_path = os.path.join(tmpdir, "model.onnx") model.to_onnx(file_path, model.example_input_array, export_params=True) ort_session = onnxruntime.InferenceSession(file_path)