In [1]:
import evaluate
In [2]:
predictions = ["I have socks."]

references = [
    ["In my dresser I have socks."],
]

bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references)
results
Out[2]:
{'bleu': 0.4723665527410147,
 'precisions': [1.0, 1.0, 1.0, 1.0],
 'brevity_penalty': 0.4723665527410147,
 'length_ratio': 0.5714285714285714,
 'translation_length': 4,
 'reference_length': 7}