lightning/pytorch_lightning/metrics/functional/nlp.py

# referenced from
# Library Name: torchtext
# Authors: torchtext authors and @sluks
# Date: 2020-07-18
# Link: https://pytorch.org/text/_modules/torchtext/data/metrics.html#bleu_score
from collections import Counter
from typing import List, Sequence

import torch


def _count_ngram(ngram_input_list: List[str], n_gram: int) -> Counter:
    """
    Counting how many times each word appears in a given text with ngram

    Args:
        ngram_input_list: A list of translated text or reference texts
        n_gram: gram value ranged 1 to 4

    Return:
        ngram_counter: a collections.Counter object of ngram
    """

    ngram_counter = Counter()

    for i in range(1, n_gram + 1):
        for j in range(len(ngram_input_list) - i + 1):
            ngram_key = tuple(ngram_input_list[j:(i + j)])
            ngram_counter[ngram_key] += 1

    return ngram_counter


def bleu_score(
        translate_corpus: Sequence[str],
        reference_corpus: Sequence[str],
        n_gram: int = 4,
        smooth: bool = False
) -> torch.Tensor:
    """
    Calculate BLEU score of machine translated text with one or more references

    Args:
        translate_corpus: An iterable of machine translated corpus
        reference_corpus: An iterable of iterables of reference corpus
        n_gram: Gram value ranged from 1 to 4 (Default 4)
        smooth: Whether or not to apply smoothing – Lin et al. 2004

    Return:
        Tensor with BLEU Score

    Example:

        >>> translate_corpus = ['the cat is on the mat'.split()]
        >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
        >>> bleu_score(translate_corpus, reference_corpus)
        tensor(0.7598)

    """

    assert len(translate_corpus) == len(reference_corpus)
    numerator = torch.zeros(n_gram)
    denominator = torch.zeros(n_gram)
    precision_scores = torch.zeros(n_gram)
    c = 0.0
    r = 0.0

    for (translation, references) in zip(translate_corpus, reference_corpus):
        c += len(translation)
        ref_len_list = [len(ref) for ref in references]
        ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
        r += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
        translation_counter = _count_ngram(translation, n_gram)
        reference_counter = Counter()

        for ref in references:
            reference_counter |= _count_ngram(ref, n_gram)

        ngram_counter_clip = translation_counter & reference_counter

        for counter_clip in ngram_counter_clip:
            numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip]

        for counter in translation_counter:
            denominator[len(counter) - 1] += translation_counter[counter]

    trans_len = torch.tensor(c)
    ref_len = torch.tensor(r)

    if min(numerator) == 0.0:
        return torch.tensor(0.0)

    if smooth:
        precision_scores = torch.add(numerator, torch.ones(n_gram)) / torch.add(denominator, torch.ones(n_gram))
    else:
        precision_scores = numerator / denominator

    log_precision_scores = torch.tensor([1.0 / n_gram] * n_gram) * torch.log(precision_scores)
    geometric_mean = torch.exp(torch.sum(log_precision_scores))
    brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 - (ref_len / trans_len))
    bleu = brevity_penalty * geometric_mean

    return bleu