lightning/pytorch_lightning/metrics/functional/nlp.py

# referenced from
# Library Name: torchtext
# Authors: torchtext authors and @sluks
# Date: 2020-07-18
# Link: https://pytorch.org/text/_modules/torchtext/data/metrics.html#bleu_score
from collections import Counter
from typing import List, Sequence

import torch


def _count_ngram(ngram_input_list: List[str], n_gram: int) -> Counter:
    """
    Counting how many times each word appears in a given text with ngram

    Args:
        ngram_input_list: A list of translated text or reference texts
        n_gram: gram value ranged 1 to 4

    Return:
        ngram_counter: a collections.Counter object of ngram
    """

    ngram_counter = Counter()

    for i in range(1, n_gram + 1):
        for j in range(len(ngram_input_list) - i + 1):
            ngram_key = tuple(ngram_input_list[j:(i + j)])
            ngram_counter[ngram_key] += 1

    return ngram_counter


def bleu_score(
        translate_corpus: Sequence[str],
        reference_corpus: Sequence[str],
        n_gram: int = 4,
        smooth: bool = False
) -> torch.Tensor:
    """
    Calculate BLEU score of machine translated text with one or more references

    Args:
        translate_corpus: An iterable of machine translated corpus
        reference_corpus: An iterable of iterables of reference corpus
        n_gram: Gram value ranged from 1 to 4 (Default 4)
        smooth: Whether or not to apply smoothing – Lin et al. 2004

    Return:
        Tensor with BLEU Score

    Example:

        >>> translate_corpus = ['the cat is on the mat'.split()]
        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
        >>> bleu_score(translate_corpus, reference_corpus)
        tensor(0.7598)

    """

    assert len(translate_corpus) == len(reference_corpus)
    numerator = torch.zeros(n_gram)
    denominator = torch.zeros(n_gram)
    precision_scores = torch.zeros(n_gram)
    c = 0.0
    r = 0.0

    for (translation, references) in zip(translate_corpus, reference_corpus):
        c += len(translation)
        ref_len_list = [len(ref) for ref in references]
        ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
        r += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
        translation_counter = _count_ngram(translation, n_gram)
        reference_counter = Counter()

        for ref in references:
            reference_counter |= _count_ngram(ref, n_gram)

        ngram_counter_clip = translation_counter & reference_counter

        for counter_clip in ngram_counter_clip:
            numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip]

        for counter in translation_counter:
            denominator[len(counter) - 1] += translation_counter[counter]

    trans_len = torch.tensor(c)
    ref_len = torch.tensor(r)

    if min(numerator) == 0.0:
        return torch.tensor(0.0)

    if smooth:
        precision_scores = torch.add(numerator, torch.ones(n_gram)) / torch.add(denominator, torch.ones(n_gram))
    else:
        precision_scores = numerator / denominator

    log_precision_scores = torch.tensor([1.0 / n_gram] * n_gram) * torch.log(precision_scores)
    geometric_mean = torch.exp(torch.sum(log_precision_scores))
    brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 - (ref_len / trans_len))
    bleu = brevity_penalty * geometric_mean

    return bleu
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								# referenced from
 								# Library Name: torchtext
 								# Authors: torchtext authors and @sluks
 								# Date: 2020-07-18
 								# Link: https://pytorch.org/text/_modules/torchtext/data/metrics.html#bleu_score
 								from collections import Counter
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								from typing import List, Sequence
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
 								import torch
 								def _count_ngram(ngram_input_list: List[str], n_gram: int) -> Counter:
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								    """
 								    Counting how many times each word appears in a given text with ngram
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
 								    Args:
 								        ngram_input_list: A list of translated text or reference texts
 								        n_gram: gram value ranged 1 to 4
 								    Return:
 								        ngram_counter: a collections.Counter object of ngram
 								    """
 								    ngram_counter = Counter()
 								    for i in range(1, n_gram + 1):
 								        for j in range(len(ngram_input_list) - i + 1):
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								            ngram_key = tuple(ngram_input_list[j:(i + j)])
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								            ngram_counter[ngram_key] += 1
 								    return ngram_counter
 								def bleu_score(
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								        translate_corpus: Sequence[str],
 								        reference_corpus: Sequence[str],
 								        n_gram: int = 4,
 								        smooth: bool = False
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								) -> torch.Tensor:
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								    """
 								    Calculate BLEU score of machine translated text with one or more references
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
 								    Args:
 								        translate_corpus: An iterable of machine translated corpus
 								        reference_corpus: An iterable of iterables of reference corpus
 								        n_gram: Gram value ranged from 1 to 4 (Default 4)
 								        smooth: Whether or not to apply smoothing – Lin et al. 2004
 								    Return:
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
+								        Tensor with BLEU Score
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
 								    Example:
 								        >>> translate_corpus = ['the cat is on the mat'.split()]
 								        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
 								        >>> bleu_score(translate_corpus, reference_corpus)
 								        tensor(0.7598)
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								    """
 								    assert len(translate_corpus) == len(reference_corpus)
 								    numerator = torch.zeros(n_gram)
 								    denominator = torch.zeros(n_gram)
 								    precision_scores = torch.zeros(n_gram)
 								    c = 0.0
 								    r = 0.0
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								    for (translation, references) in zip(translate_corpus, reference_corpus):
 								        c += len(translation)
 								        ref_len_list = [len(ref) for ref in references]
 								        ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
 								        r += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
 								        translation_counter = _count_ngram(translation, n_gram)
 								        reference_counter = Counter()
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								        for ref in references:
 								            reference_counter |= _count_ngram(ref, n_gram)
 								        ngram_counter_clip = translation_counter & reference_counter
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								        for counter_clip in ngram_counter_clip:
 								            numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip]
 								        for counter in translation_counter:
 								            denominator[len(counter) - 1] += translation_counter[counter]
 								    trans_len = torch.tensor(c)
 								    ref_len = torch.tensor(r)
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								    if min(numerator) == 0.0:
 								        return torch.tensor(0.0)
 								    if smooth:
 								        precision_scores = torch.add(numerator, torch.ones(n_gram)) / torch.add(denominator, torch.ones(n_gram))
 								    else:
 								        precision_scores = numerator / denominator
-												fix reduction docstring and clean tests (#2885)

* fix reduction docstring

* Update docstring and some cleanup

* miss

* suggestion from code review

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>

Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai>
											
										
										
											2020-08-09 10:03:24 +00:00
-												metrics: add BLEU (#2535)

* metrics: added bleu score and test bleu

* metrics: fixed type hints in bleu

* bleu score moved to metrics/functional/nlp.py

* refactor with torch.Tensor

* Update test_sequence.py

* refactor as Borda requests and nltk==3.2

* locked nltk==3.3

* nltk>=3.3, parametrized smooth argument for test

* fix bleu_score example

* added class BLEUScore metrics and test

* added class BLEUScore metrics and test

* update CHANGELOG

* refactor with torchtext

* torchtext changed to optional import

* fix E501 line too long

* add else: in optional import

* remove pragma: no-cover

* constants changed to CAPITALS

* remove class in tests

* List -> Sequence, conda -> pip, cast with tensor

* add torchtext in test.txt

* remove torchtext from test.txt

* bump torchtext to 0.5.0

* bump torchtext to 0.5.0

* Apply suggestions from code review

* ignore bleu score in doctest, renamed to nlp.py

* back to implementation with torch

* remove --ignore in CI test, proper reference format

* apply justus comment

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
											
										
										
											2020-07-22 13:58:24 +00:00
+								    log_precision_scores = torch.tensor([1.0 / n_gram] * n_gram) * torch.log(precision_scores)
 								    geometric_mean = torch.exp(torch.sum(log_precision_scores))
 								    brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 - (ref_len / trans_len))
 								    bleu = brevity_penalty * geometric_mean
 								    return bleu