Source code for textflint.generation_layer.transformation.NLI.swap_ant

r"""
Replacing its words with antonyms provided by WordNet
==========================================================
"""

from nltk.wsd import lesk
from ..transformation import Transformation
from ....common.settings import BLACK_LIST_WORD

__all__ = ['SwapAnt']


[docs]class SwapAnt(Transformation):
    r"""
    Transforms an input by replacing its words with antonyms provided by
    WordNet. Download nltk_data before running.

    Implement follow by Stress Test Evaluation for Natural Language Inference
    For the correctness of trasformation we swap the word has best_sense(
    Wordnet) to its antonym

    https://www.aclweb.org/anthology/C18-1198/

    exmaple:
    {
        hypothesis: I hate this book.
        premise: This book is my favorite.
        label: contradiction
    }
    """

[docs]    def __init__(
            self,
            language="eng"):
        r"""
        :param string language: language of transformation
        """
        super().__init__()
        self.language = language
        self.blacklist_words = BLACK_LIST_WORD

    def __repr__(self):
        return 'SwapAnt'

[docs]    def transform(self, sample, n=1, **kwargs):
        r"""
        Transform data sample to a list of Sample.

        :param ~NLISample sample: Data sample for augmentation
        :param int n: Default is 1. MAX number of unique augmented output
        :param **kwargs:
        :return: Augmented data
        """
        transform_results = self._transform(sample, **kwargs)

        if transform_results:
            return [data for data in transform_results if not data.is_origin]
        else:
            return []

    def _transform(self, sample, **kwargs):
        r"""
        Transform text string, this kind of transformation
        can only produce one sample.

        :param ~NLISample sample: input data, a NLISample contains 'hypothesis'
            field, 'premise' field and 'y' field
        :param int n: number of generated samples, this transformation can only
            generate one sample
        :return list trans_samples: transformed sample list that only contain
            one sample
        """

        label_tag = sample.get_value('y')

        if label_tag != 'entailment':
            return None

        tokens1 = sample.get_words('hypothesis')
        tokens2 = sample.get_words('premise')
        original_text2 = sample.get_value('premise')

        for num, each_word in enumerate(tokens2):
            if each_word not in self.blacklist_words:
                # todo， pre_process 包实现
                best_sense = lesk(tokens2, each_word)
                if best_sense is not None and (
                        best_sense.pos() == 's' or best_sense.pos() == 'n'):
                    for lemma in best_sense.lemmas():
                        possible_antonyms = lemma.antonyms()

                        for antonym in possible_antonyms:
                            if "_" in antonym._name or antonym._name == \
                                    "civilian":
                                continue
                            if each_word not in tokens1:
                                continue
                            new_s1 = original_text2.replace(
                                each_word, antonym._name, 1)
                            sample = sample.replace_fields(
                                ['hypothesis', 'premise', 'y'],
                                [new_s1, original_text2, 'contradiction'])
        return [sample]