Source code for textflint.generation_layer.transformation.UT.reverse_neg

r"""
Transforms an affirmative sentence into a negative sentence, or vice versa
==========================================================
"""

__all__ = ['ReverseNeg']

from ..transformation import Transformation


[docs]class ReverseNeg(Transformation):
    r"""
    Transforms an affirmative sentence into a negative sentence, or vice versa.
    Each sample generate one transformed sample at most.

    """
    def __init__(
        self,
        **kwargs
    ):
        super().__init__()

    def __repr__(self):
        return "ReverseNeg"

    def _transform(self, sample, field='x', n=1, **kwargs):
        r"""
        Transform text string according transform_field.

        :param Sample sample: input data, normally one data component.
        :param str|list field: indicate which field to transform
        :param int n: number of  generated samples
        :return list trans_samples:transformed sample list.
        
        """
        trans_samples = []

        tokens = sample.get_words(field)
        if not tokens:
            return []

        judge_sentence = self._judge_sentence(tokens)

        if judge_sentence == 'remove':
            del_sample = self._get_del_sample(tokens, field, sample)
            if del_sample:
                trans_samples.append(del_sample)

        if judge_sentence == 'add':
            add_sample = self._get_add_sample(field, tokens, sample)
            if add_sample:
                trans_samples.append(add_sample)

        return trans_samples

    @staticmethod
    def _judge_sentence(tokens):
        r"""
        :param tokens: word list
        :return: transformed_text or None
        
        """
        for i in tokens:
            if i in ['not', 'n\'t', 'don', 'didn', 'doesn',
                     'doesn', 'aren', 'isn', 'wasn', 'weren']:
                return 'remove'

        return 'add'

    @staticmethod
    def _check_sentence(tokens):
        """
        Check positive or negative
        
        """
        if len(tokens) < 3:
            return False
        if '?' in tokens:
            return False
        if tokens[0].lower() in ['are', 'is', 'be', 'am', 'was', 'were', 'how',
                                 'why', 'what', 'where', 'who', 'when', 'can',
                                 'do', 'did', 'does', 'could', 'should',
                                 'would', 'will', 'shall', 'thank', 'thanks']:
            return False
        else:
            return True

    def _parse_sentence(self, tokens):
        """
        Dependency Parsing
        """
        sentence = ' '.join(x for x in tokens)
        sentence_tokens = self.processor.sentence_tokenize(sentence)
        root_id_list = []

        parse_tokens = self.processor.get_dep_parser(
            sentence_tokens[0])

        for i, token in enumerate(parse_tokens):
            if len(token) < 4:
                continue
            if token[3] in ['cop', 'ROOT', 'aux']:
                root_id_list.append(i)

        return root_id_list

    def _get_del_sample(self, tokens, field, sample):
        for i, token in enumerate(tokens):
            # do not + verb → verb
            if token in ['do', 'does', 'did'] and len(tokens) > i + 2:
                if tokens[i + 1] in ['not', 'n\'t']:
                    root_id_list = self._parse_sentence(tokens)
                    pos_tag = self.processor.get_pos(tokens[i + 2])[0][1]

                    if pos_tag in [
                            'VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN'] or (
                            i + 2) in root_id_list:
                        del_list = [i, i + 1]
                        del_sample = sample

                        for i, index in enumerate(del_list):
                            del_sample = del_sample.delete_field_at_index(
                                field, index - i)

                        return del_sample

            if token in ['not', 'n\'t', 'don', 'didn', 'doesn', 'doesn',
                         'aren', 'isn', 'wasn', 'weren']:
                return sample.delete_field_at_index(field, i)

        return []

    def _get_add_sample(self, field, tokens, sample):
        root_id_list = self._parse_sentence(tokens)
        if root_id_list:
            check_sentence = self._check_sentence(tokens)
            if check_sentence:
                root_id = root_id_list[0]
                add_sample = self._add_sample(field, tokens, root_id, sample)
                return add_sample
            else:
                return []
        else:
            return []

    def _add_sample(self, field, tokens, root_id, sample):
        if tokens[root_id].lower() in ['is', 'was', 'were', 'am',
                                       'are', '\'s', '\'re', '\'m']:
            add_sample = sample.insert_field_before_index(
                field, root_id + 1, 'not')
            return add_sample

        if tokens[root_id].lower() in ['being']:
            add_sample = sample.insert_field_before_index(
                field, root_id, 'not')
            return add_sample

        if tokens[root_id].lower() in ['do', 'does', 'did', 'can',
                                       'have', 'will', 'could', 'would',
                                       'could', 'should']:
            add_sample = sample.insert_field_before_index(
                field, root_id + 1, 'not')
            return add_sample
        else:
            token_pos = self.processor.get_pos(tokens[root_id])
            trans_sent = []
            if token_pos[0][1] in ['VB', 'VBP', 'VBZ', 'VBG',
                                   'VBD', 'VBN', 'NNS', 'NN']:
                if token_pos[0][1] in ['VB', 'VBP', 'VBG']:
                    neg_word = ['do', 'not']
                if token_pos[0][1] in ['VBD', 'VBN']:
                    neg_word = ['did', 'not']
                else:
                    neg_word = ['does', 'not']
                add_sample = sample

                for i, word in enumerate(neg_word):
                    add_sample = add_sample.insert_field_before_index(
                        field, root_id + i, word)
                return add_sample

            return trans_sent