Source code for textflint.common.utils.word_op

import random
import string


[docs]def get_start_end(word, skip_first=False, skip_last=False): """ Get valid operation range of one word. :param str word: target word string :param bool skip_first: whether operate first char :param bool skip_last: whether operate last char :return: start index, last index """ chars = list(word) start = int(skip_first) end = len(chars) - 1 - int(skip_last) return start, end
[docs]def get_random_letter(src_char=None): """ Get replaced letter according src_char format. :param char src_char: :return: default return a lower letter """ if src_char.isdigit(): return random.choice(string.digits) if src_char.isupper(): return random.choice(string.ascii_uppercase) else: return random.choice(string.ascii_lowercase)
[docs]def swap(word, num=1, skip_first=False, skip_last=False): """ Swaps random characters with their neighbors. :param str word: target word :param int num: number of typos to add :param bool skip_first: whether swap first char of word :param bool skip_last: whether swap last char of word :return: perturbed strings """ if len(word) <= 1: return word chars = list(word) start, end = get_start_end(word, skip_first, skip_last) # error swap num, return original word if end - start < num: return None swap_idxes = random.sample(list(range(start, end)), num) for swap in swap_idxes: tmp = chars[swap] chars[swap] = chars[swap + 1] chars[swap + 1] = tmp return ''.join(chars)
[docs]def insert(word, num=1, skip_first=False, skip_last=False): """ Perturb the word with 1 random character inserted. :param str word: target word :param int num: number of typos to add :param bool skip_first: whether insert char at the beginning of word :param bool skip_last: whether insert char at the end of word :return: perturbed strings """ if len(word) <= 1: return word chars = list(word) start, end = get_start_end(word, skip_first, skip_last) if end - start + 2 < num: return None swap_idxes = random.sample(list(range(start, end + 2)), num) swap_idxes.sort(reverse=True) for idx in swap_idxes: insert_char = get_random_letter(chars[min(idx, len(chars) - 1)]) chars = chars[:idx] + [insert_char] + chars[idx:] return "".join(chars)
[docs]def delete(word, num=1, skip_first=False, skip_last=False): """ Perturb the word wityh 1 letter deleted. :param str word: number of typos to add :param int num: number of typos to add :param bool skip_first: whether delete the char at the beginning of word :param bool skip_last: whether delete the char at the end of word :return: perturbed strings """ if len(word) <= 1: return word chars = list(word) start, end = get_start_end(word, skip_first, skip_last) if end - start + 1 < num: return None swap_idxes = random.sample(list(range(start, end + 1)), num) swap_idxes.sort(reverse=True) for idx in swap_idxes: chars = chars[:idx] + chars[idx + 1:] return "".join(chars)
[docs]def replace(word, num=1, skip_first=False, skip_last=False): """ Perturb the word with 1 letter substituted for a random letter. :param str word: target word :param int num: number of typos to add :param bool skip_first: whether replace the char at the beginning of word :param bool skip_last: whether replace the char at the beginning of word :return: perturbed strings """ if len(word) <= 1: return [] chars = list(word) start, end = get_start_end(word, skip_first, skip_last) # error swap num, return original word if end - start + 1 < num: return word idxes = random.sample(list(range(start, end + 1)), num) for idx in idxes: chars[idx] = get_random_letter(chars[idx]) return "".join(chars)