93 lines
3.3 KiB
Python
93 lines
3.3 KiB
Python
# referenced from
|
||
# Library Name: torchtext
|
||
# Authors: torchtext authors and @sluks
|
||
# Date: 2020-07-18
|
||
# Link: https://pytorch.org/text/_modules/torchtext/data/metrics.html#bleu_score
|
||
from collections import Counter
|
||
from typing import Sequence, List
|
||
|
||
import torch
|
||
|
||
|
||
def _count_ngram(ngram_input_list: List[str], n_gram: int) -> Counter:
|
||
"""Counting how many times each word appears in a given text with ngram
|
||
|
||
Args:
|
||
ngram_input_list: A list of translated text or reference texts
|
||
n_gram: gram value ranged 1 to 4
|
||
|
||
Return:
|
||
ngram_counter: a collections.Counter object of ngram
|
||
"""
|
||
|
||
ngram_counter = Counter()
|
||
|
||
for i in range(1, n_gram + 1):
|
||
for j in range(len(ngram_input_list) - i + 1):
|
||
ngram_key = tuple(ngram_input_list[j : i + j])
|
||
ngram_counter[ngram_key] += 1
|
||
|
||
return ngram_counter
|
||
|
||
|
||
def bleu_score(
|
||
translate_corpus: Sequence[str], reference_corpus: Sequence[str], n_gram: int = 4, smooth: bool = False
|
||
) -> torch.Tensor:
|
||
"""Calculate BLEU score of machine translated text with one or more references.
|
||
|
||
Args:
|
||
translate_corpus: An iterable of machine translated corpus
|
||
reference_corpus: An iterable of iterables of reference corpus
|
||
n_gram: Gram value ranged from 1 to 4 (Default 4)
|
||
smooth: Whether or not to apply smoothing – Lin et al. 2004
|
||
|
||
Return:
|
||
A Tensor with BLEU Score
|
||
|
||
Example:
|
||
|
||
>>> translate_corpus = ['the cat is on the mat'.split()]
|
||
>>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]]
|
||
>>> bleu_score(translate_corpus, reference_corpus)
|
||
tensor(0.7598)
|
||
"""
|
||
|
||
assert len(translate_corpus) == len(reference_corpus)
|
||
numerator = torch.zeros(n_gram)
|
||
denominator = torch.zeros(n_gram)
|
||
precision_scores = torch.zeros(n_gram)
|
||
c = 0.0
|
||
r = 0.0
|
||
for (translation, references) in zip(translate_corpus, reference_corpus):
|
||
c += len(translation)
|
||
ref_len_list = [len(ref) for ref in references]
|
||
ref_len_diff = [abs(len(translation) - x) for x in ref_len_list]
|
||
r += ref_len_list[ref_len_diff.index(min(ref_len_diff))]
|
||
translation_counter = _count_ngram(translation, n_gram)
|
||
reference_counter = Counter()
|
||
for ref in references:
|
||
reference_counter |= _count_ngram(ref, n_gram)
|
||
|
||
ngram_counter_clip = translation_counter & reference_counter
|
||
for counter_clip in ngram_counter_clip:
|
||
numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip]
|
||
|
||
for counter in translation_counter:
|
||
denominator[len(counter) - 1] += translation_counter[counter]
|
||
|
||
trans_len = torch.tensor(c)
|
||
ref_len = torch.tensor(r)
|
||
if min(numerator) == 0.0:
|
||
return torch.tensor(0.0)
|
||
|
||
if smooth:
|
||
precision_scores = torch.add(numerator, torch.ones(n_gram)) / torch.add(denominator, torch.ones(n_gram))
|
||
else:
|
||
precision_scores = numerator / denominator
|
||
log_precision_scores = torch.tensor([1.0 / n_gram] * n_gram) * torch.log(precision_scores)
|
||
geometric_mean = torch.exp(torch.sum(log_precision_scores))
|
||
brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 - (ref_len / trans_len))
|
||
bleu = brevity_penalty * geometric_mean
|
||
|
||
return bleu
|