Source code for textflint.generation_layer.transformation.NLI.num_word

r"""
Transforms an input by replacing its number word
==========================================================
"""

from ..transformation import Transformation
from ....common.utils.num_word import _get_contradictory_hypothesis

__all__ = ['NumWord']

LOWER_YEAR_NUM = 1000
UPPER_YEAR_NUM = 2020


[docs]class NumWord(Transformation): r""" Transforms an input by replacing its number word exmaple: { hypothesis: Mr Zhang has more than 20 students in Fudan university. premise: Mr Zhang has 10 students in Fudan university. y: contradicition } """ def __init__(self): super().__init__() def __repr__(self): return 'NumWord'
[docs] def transform(self, sample, n=1, **kwargs): r""" Transform data sample to a list of Sample. :param ~NLISample sample: Data sample for augmentation :param int n: Default is 1. MAX number of unique augmented output :param **kwargs: :return: Augmented data """ transform_results = self._transform(sample, **kwargs) if transform_results: return [data for data in transform_results if not data.is_origin] else: return []
def _transform(self, sample, n=1, **kwargs): r""" Transform text string, this kind of transformation can only produce one sample. :param ~NLISample sample: input data, a NLISample contains 'hypothesis' field, 'premise' field and 'y' field :param int n: number of generated samples, this transformation can only generate one sample :return list trans_samples: transformed sample list that only contain one sample """ tokens = sample.get_words('premise') original_text = sample.get_text('premise') flag = False for num, token in enumerate(tokens): if token.isdigit(): number = int(token) if LOWER_YEAR_NUM <= number <= UPPER_YEAR_NUM: continue # ent_hyp = _get_entailed_hypothesis(tokens, num, number) cont_hyp = _get_contradictory_hypothesis(tokens, num, number) flag = True break if not flag: return None sample = sample.replace_fields(['hypothesis', 'premise', 'y'], [ original_text, cont_hyp, 'contradiction']) return [sample]