Source code for textflint.generation_layer.transformation.UT.reverse_neg

r"""
Transforms an affirmative sentence into a negative sentence, or vice versa
==========================================================
"""

__all__ = ['ReverseNeg']

from ..transformation import Transformation


[docs]class ReverseNeg(Transformation): r""" Transforms an affirmative sentence into a negative sentence, or vice versa. Each sample generate one transformed sample at most. """ def __init__( self, **kwargs ): super().__init__() def __repr__(self): return "ReverseNeg" def _transform(self, sample, field='x', n=1, **kwargs): r""" Transform text string according transform_field. :param Sample sample: input data, normally one data component. :param str|list field: indicate which field to transform :param int n: number of generated samples :return list trans_samples:transformed sample list. """ trans_samples = [] tokens = sample.get_words(field) if not tokens: return [] judge_sentence = self._judge_sentence(tokens) if judge_sentence == 'remove': del_sample = self._get_del_sample(tokens, field, sample) if del_sample: trans_samples.append(del_sample) if judge_sentence == 'add': add_sample = self._get_add_sample(field, tokens, sample) if add_sample: trans_samples.append(add_sample) return trans_samples @staticmethod def _judge_sentence(tokens): r""" :param tokens: word list :return: transformed_text or None """ for i in tokens: if i in ['not', 'n\'t', 'don', 'didn', 'doesn', 'doesn', 'aren', 'isn', 'wasn', 'weren']: return 'remove' return 'add' @staticmethod def _check_sentence(tokens): """ Check positive or negative """ if len(tokens) < 3: return False if '?' in tokens: return False if tokens[0].lower() in ['are', 'is', 'be', 'am', 'was', 'were', 'how', 'why', 'what', 'where', 'who', 'when', 'can', 'do', 'did', 'does', 'could', 'should', 'would', 'will', 'shall', 'thank', 'thanks']: return False else: return True def _parse_sentence(self, tokens): """ Dependency Parsing """ sentence = ' '.join(x for x in tokens) sentence_tokens = self.processor.sentence_tokenize(sentence) root_id_list = [] parse_tokens = self.processor.get_dep_parser( sentence_tokens[0]) for i, token in enumerate(parse_tokens): if len(token) < 4: continue if token[3] in ['cop', 'ROOT', 'aux']: root_id_list.append(i) return root_id_list def _get_del_sample(self, tokens, field, sample): for i, token in enumerate(tokens): # do not + verb → verb if token in ['do', 'does', 'did'] and len(tokens) > i + 2: if tokens[i + 1] in ['not', 'n\'t']: root_id_list = self._parse_sentence(tokens) pos_tag = self.processor.get_pos(tokens[i + 2])[0][1] if pos_tag in [ 'VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN'] or ( i + 2) in root_id_list: del_list = [i, i + 1] del_sample = sample for i, index in enumerate(del_list): del_sample = del_sample.delete_field_at_index( field, index - i) return del_sample if token in ['not', 'n\'t', 'don', 'didn', 'doesn', 'doesn', 'aren', 'isn', 'wasn', 'weren']: return sample.delete_field_at_index(field, i) return [] def _get_add_sample(self, field, tokens, sample): root_id_list = self._parse_sentence(tokens) if root_id_list: check_sentence = self._check_sentence(tokens) if check_sentence: root_id = root_id_list[0] add_sample = self._add_sample(field, tokens, root_id, sample) return add_sample else: return [] else: return [] def _add_sample(self, field, tokens, root_id, sample): if tokens[root_id].lower() in ['is', 'was', 'were', 'am', 'are', '\'s', '\'re', '\'m']: add_sample = sample.insert_field_before_index( field, root_id + 1, 'not') return add_sample if tokens[root_id].lower() in ['being']: add_sample = sample.insert_field_before_index( field, root_id, 'not') return add_sample if tokens[root_id].lower() in ['do', 'does', 'did', 'can', 'have', 'will', 'could', 'would', 'could', 'should']: add_sample = sample.insert_field_before_index( field, root_id + 1, 'not') return add_sample else: token_pos = self.processor.get_pos(tokens[root_id]) trans_sent = [] if token_pos[0][1] in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN', 'NNS', 'NN']: if token_pos[0][1] in ['VB', 'VBP', 'VBG']: neg_word = ['do', 'not'] if token_pos[0][1] in ['VBD', 'VBN']: neg_word = ['did', 'not'] else: neg_word = ['does', 'not'] add_sample = sample for i, word in enumerate(neg_word): add_sample = add_sample.insert_field_before_index( field, root_id + i, word) return add_sample return trans_sent