Source code for textflint.generation_layer.transformation.UT.swap_ant_wordnet

r"""
Swapping word synonyms in WordNet
==========================================================
"""

__all__ = ['SwapAntWordNet']

from ...transformation import WordSubstitute


[docs]class SwapAntWordNet(WordSubstitute): r""" Transforms an input by replacing its words with antonym provided by WordNet. Download nltk_data before running. Just support adj pos word antonym replace. """ def __init__( self, trans_min=1, trans_max=10, trans_p=0.1, stop_words=None, language="eng", **kwargs): super().__init__( trans_min=trans_min, trans_max=trans_max, trans_p=trans_p, stop_words=stop_words) self.language = language self.get_pos = True def __repr__(self): return 'SwapAntWordNet' def _get_candidates(self, word, n=5, pos=None, **kwargs): r""" Returns a list containing all possible words with 1 character replaced by a homoglyph. """ candidates = set() # filter different pos in get_wsd function antonyms = self.processor.get_antonyms([(word, pos)])[0] for antonym in antonyms: for ant_word in antonym.lemma_names(lang=self.language): if ( (ant_word != word) and ("_" not in ant_word) ): # WordNet can suggest phrases that are joined by '_' but we # ignore phrases. candidates.add(ant_word) if not candidates: return [] return list(candidates)[:n]
[docs] def skip_aug(self, tokens, mask, pos=None): r""" Skip non adj word. :param list tokens: word list :param list mask: mask list :param list|None pos: :return adj_indices: list of allowed indices. """ nor_pos = [] adj_indices = [] for pos_tag in pos: nor_pos.append(self.processor.normalize_pos(pos_tag)) indices = self.pre_skip_aug(tokens, mask) for index in indices: if nor_pos[index] == 'a': adj_indices.append(index) return adj_indices