Source code for textflint.generation_layer.transformation.SM.overlap

r"""
Generate some samples by templates
       implement follow
       Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference ACL2019
       In order to generate some sample whose premise is the sequence of the hypothesis but the semantic are different.
==========================================================
"""

from ..transformation import Transformation
from ....common.utils.overlap_templates import *
from ....input_layer.component.sample import SMSample

__all__ = ['Overlap']


def no_the(sentence):
    return sentence.replace("the ", "")


def repeaters(sentence):
    condensed = no_the(sentence)
    words = []

    for word in condensed.split():
        if word in lemma:
            words.append(lemma[word])
        else:
            words.append(word)

    if len(list(set(words))) == len(words):
        return False
    else:
        return True


[docs]class Overlap(Transformation): r""" Generate some samples by templates which implement follow Right for the Wrong Reasons: Diagnosing Syntactic Heuristics in Natural Language Inference ACL2019 In order to generate some sample whose premise is the sequence of the hypothesis but the semantic are different. Exmaple:: { sentence1: I hope Tom can go to school. sentence2: Tom go to school. y: 0 } """ def __init__(self): super().__init__() def __repr__(self): return 'Overlap'
[docs] def transform(self, sample, n=1, **kwargs): r""" Transform data sample to a list of Sample. :param ~SMSample sample: Data sample for augmentation :param int n: Default is 1. MAX number of unique augmented output :param **kwargs: :return: Augmented data """ transform_results = self._transform(n, **kwargs) if transform_results: return transform_results else: return []
def _transform(self, n=1, **kwargs): r""" Transform text string, this kind of transformation can only produce one sample. :param ~NLISample sample: input data, a NLISample contains 'sentence1' field, 'sentence2' field and 'y' field :param int n: number of generated samples, this transformation can only generate one sample :return list trans_samples: transformed sample list that only contain one sample """ example_counter = 0 trans_list = [] for template_tuple in template_list: label = template_tuple[2] if label == 'entailment': label = '1' else: label = '0' template = template_tuple[3] example_dict = {} # TODO, random select template count_examples = 0 while count_examples < n: example = template_filler(template) example_sents = tuple(example[:2]) if example_sents not in example_dict and not repeaters( example[0]): example_dict[example_sents] = 1 trans_sample = { 'sentence1': example[0], 'sentence2': example[1], 'y': label } trans_list.append(SMSample(trans_sample)) count_examples += 1 example_counter += 1 return trans_list