lightning/tests/metrics/test_nlp.py

import pytest
import torch

from pytorch_lightning.metrics.nlp import BLEUScore

# example taken from
# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.corpus_bleu
HYP1 = "It is a guide to action which ensures that the military always obeys the commands of the party".split()
HYP2 = "he read the book because he was interested in world history".split()

REF1A = "It is a guide to action that ensures that the military will forever heed Party commands".split()
REF1B = "It is a guiding principle which makes the military forces always being under the command of the Party".split()
REF1C = "It is the practical guide for the army always to heed the directions of the party".split()
REF2A = "he was interested in world history because he read the book".split()

LIST_OF_REFERENCES = [[REF1A, REF1B, REF1C], [REF2A]]
HYPOTHESES = [HYP1, HYP2]


@pytest.mark.parametrize(
    ["n_gram", "smooth"],
    [pytest.param(1, True), pytest.param(2, False), pytest.param(3, True), pytest.param(4, False),],
)
def test_bleu(smooth, n_gram):
    bleu = BLEUScore(n_gram=n_gram, smooth=smooth)
    assert bleu.name == "bleu"

    pl_output = bleu(HYPOTHESES, LIST_OF_REFERENCES)
    assert isinstance(pl_output, torch.Tensor)
metrics: add BLEU (#2535) * metrics: added bleu score and test bleu * metrics: fixed type hints in bleu * bleu score moved to metrics/functional/nlp.py * refactor with torch.Tensor * Update test_sequence.py * refactor as Borda requests and nltk==3.2 * locked nltk==3.3 * nltk>=3.3, parametrized smooth argument for test * fix bleu_score example * added class BLEUScore metrics and test * added class BLEUScore metrics and test * update CHANGELOG * refactor with torchtext * torchtext changed to optional import * fix E501 line too long * add else: in optional import * remove pragma: no-cover * constants changed to CAPITALS * remove class in tests * List -> Sequence, conda -> pip, cast with tensor * add torchtext in test.txt * remove torchtext from test.txt * bump torchtext to 0.5.0 * bump torchtext to 0.5.0 * Apply suggestions from code review * ignore bleu score in doctest, renamed to nlp.py * back to implementation with torch * remove --ignore in CI test, proper reference format * apply justus comment Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2020-07-22 13:58:24 +00:00			`import pytest`
			`import torch`

			`from pytorch_lightning.metrics.nlp import BLEUScore`

			`# example taken from`
			`# https://www.nltk.org/api/nltk.translate.html?highlight=bleu%20score#nltk.translate.bleu_score.corpus_bleu`
			`HYP1 = "It is a guide to action which ensures that the military always obeys the commands of the party".split()`
			`HYP2 = "he read the book because he was interested in world history".split()`

			`REF1A = "It is a guide to action that ensures that the military will forever heed Party commands".split()`
			`REF1B = "It is a guiding principle which makes the military forces always being under the command of the Party".split()`
			`REF1C = "It is the practical guide for the army always to heed the directions of the party".split()`
			`REF2A = "he was interested in world history because he read the book".split()`

			`LIST_OF_REFERENCES = [[REF1A, REF1B, REF1C], [REF2A]]`
			`HYPOTHESES = [HYP1, HYP2]`


			`@pytest.mark.parametrize(`
			`["n_gram", "smooth"],`
			`[pytest.param(1, True), pytest.param(2, False), pytest.param(3, True), pytest.param(4, False),],`
			`)`
			`def test_bleu(smooth, n_gram):`
			`bleu = BLEUScore(n_gram=n_gram, smooth=smooth)`
			`assert bleu.name == "bleu"`

			`pl_output = bleu(HYPOTHESES, LIST_OF_REFERENCES)`
			`assert isinstance(pl_output, torch.Tensor)`