Source code for textflint.generation_layer.transformation.UT.insert_adv

r"""
Add adverb word before verb word with given pos tags
==========================================================
"""

__all__ = ["InsertAdv"]

import random

from ....common.settings import ADVERB_PATH
from ...transformation import Transformation
from ....common.utils.load import plain_lines_loader
from ....common.utils.list_op import trade_off_sub_words
from ....common.utils.install import download_if_needed


[docs]class InsertAdv(Transformation): r""" Transforms an input by add adverb word before verb. """ def __init__( self, **kwargs ): super().__init__() self.adverb_list = plain_lines_loader(download_if_needed(ADVERB_PATH)) def __repr__(self): return 'InsertAdv' def _transform(self, sample, field='x', n=1, **kwargs): r""" Transform text string according transform_field. :param ~Sample sample: input data, normally one data component. :param str field: indicate which field to transform :param int n: number of generated samples :param kwargs: :return list trans_samples: transformed sample list. """ pos_tags = sample.get_pos(field) _insert_indices = self._get_verb_location(pos_tags) if not _insert_indices: return [] insert_words = [] insert_indices = [] for index in _insert_indices: _insert_words = self._get_random_adverbs(n=n) if _insert_words: insert_indices.append(index) insert_words.append(_insert_words) if not insert_words: return [] insert_words, insert_indices = trade_off_sub_words( insert_words, insert_indices, n=n) trans_samples = [] # get substitute candidates combinations for i in range(len(insert_words)): single_insert_words = insert_words[i] trans_samples.append( sample.insert_field_before_indices( field, insert_indices, single_insert_words)) return trans_samples @staticmethod def _get_verb_location(pos_tags): verb_location = [] for i, pos in enumerate(pos_tags): if pos in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']: verb_location.append(i) return verb_location def _get_random_adverbs(self, n): sample_num = min(n, len(self.adverb_list)) return random.sample(self.adverb_list, sample_num)