Source code for textflint.generation_layer.validator.edit_distance

r"""
Levenshtein distance class
=====================================
"""
import editdistance

from .validator import Validator
__all__ = ['EditDistance']


[docs]class EditDistance(Validator): r""" A constraint on edit distance (Levenshtein Distance). We use the Levenshtein Distance div the long of the sentence as score. :param ~textflint.input_layer.dataset origin_dataset: the dataset of origin sample :param ~textflint.input_layer.dataset trans_dataset: the dataset of translate sample :param str|list fields: the name of the origin field need compare. """ def __init__( self, origin_dataset, trans_dataset, fields ): super().__init__( origin_dataset, trans_dataset, fields ) def __repr__(self): return "EditDistance"
[docs] def validate(self, transformed_text, reference_text): r""" Calculate the score :param str transformed_text: transformed sentence :param str reference_text: origin sentence :return float: the score of two sentence """ dis = editdistance.eval(transformed_text, reference_text) return 1 - min(dis, len(reference_text)) / len(reference_text)