Source code for textflint.generation_layer.transformation.COREF.random_shuffle

r"""
Coref - Rnd shuffle: Randomly shuffle some sentences.
At least (1-2*trans_p) sentences would be at the right pos, so don't worry
==========================================================
"""

from math import ceil
import random

from ..transformation import Transformation
__all__ = ['RndShuffle']


[docs]class RndShuffle(Transformation): r""" Randomly change the position of trans_p * num_sentences pairs of sentences Attributes: trans_p: proportion of deleted sentences; default 0.2 processor: textflint.common.preprocess.TextProcessor. Example:: ori: { 'sentences': [ ['I', 'came'], ['I', 'saw'], ['I', 'conquered'], ['Anna', 'bel', 'wanna', 'sleep'], ['Anna', 'bel', 'is', 'happy']], 'clusters': [ [[1, 1], [3, 3], [5, 5]], [[7, 8], [11, 12]]]} trans: { 'sentences': [ ['I', 'came'], ['I', 'saw'], ['Anna', 'bel', 'wanna', 'sleep'], ['I', 'conquered'], ['Anna', 'bel', 'is', 'happy']], 'clusters': [ [[1, 1], [3, 3], [9, 9]], [[5, 6], [11, 12]]]} """ def __init__(self, trans_p=0.2, **kwargs): super().__init__() self.trans_p = trans_p def __repr__(self): return 'RndShuffle' def _transform(self, sample, n=5, **kwargs): r""" :param ~textflint.CorefSample sample: a CorefSample :param str|list fields: Not used :param int n: optional; number of generated samples :param list samples_other: optional, list of dict `samples_other` contains some other CorefSamples that also originate from conll-style dicts. :return list: samples_tfed, transformed sample list. """ if sample.num_sentences() <= 1: return [sample] * n num_sentences = sample.num_sentences() samples_tfed = [] for i in range(n): # shuffle: swap for trans_p * num_sentences (at least 1) times tfed_sen_idxs = list(range(num_sentences)) for j in range(ceil(self.trans_p * num_sentences)): # randomly choose two sens ii & jj; then swap ii = int(random.random() * num_sentences) jj = int(random.random() * num_sentences) tmp = tfed_sen_idxs[ii] tfed_sen_idxs[ii] = tfed_sen_idxs[jj] tfed_sen_idxs[jj] = tmp # get the tfed sample and append to list sample_tfed = sample.shuffle_conll(tfed_sen_idxs) samples_tfed.append(sample_tfed) return samples_tfed