Source code for textflint.generation_layer.transformation.ABSA.add_diff

r"""
Add the difference part of target in ABSA task
==========================================================
"""

import random
import string
from .absa_transformation import ABSATransformation

__all__ = ['AddDiff']


[docs]class AddDiff(ABSATransformation): r""" Add the difference part of aspect to the end of original sentence. The difference part is extracted from the training set of SemEval2014 or user's customization. Example:: Original sentence: "BEST spicy tuna roll, great asian salad. (Target: spicy tuna roll)" Transformed sentence: "BEST spicy tuna roll, great asian salad, but this small place is packed, on a cold day, the seating by the entrance way can be pretty drafty and bad service." """ def __init__( self, language="eng"): super().__init__() if language != "eng": raise ValueError(f"Language {language} is not available.") self.language = language self.tokenize = self.processor.tokenize self.untokenize = self.processor.inverse_tokenize def __repr__(self): return "AddDiff" def _transform(self, sample, n=1, field='sentence', extra_text=None, **kwargs): r""" Transform data sample to a list of Sample. :param ~textflint.input_layer.component.sample.ABSAsample sample: input ABSAsample :param int n: the number of transformation, in ABSA-specific transformations n=1 :param str field:field name :param dict extra_text: extra text will be added to the original sentence :return list: list of transformed ABSAsample """ trans_samples = [] self.trans_term_list = sample.term_list self.terms = sample.terms self.term_list = sample.term_list self.sentence = sample.sentence.text if sample.trans_id is None: self.trans_id = [idx for idx in self.terms] else: self.trans_id = [sample.trans_id] all_term = [self.term_list[idx]['term'] for idx in self.term_list] for term_id in self.trans_id: trans_sample = sample.clone(sample) polarity = self.term_list[term_id]['polarity'] if polarity == 'positive': add_text = extra_text['negative'] elif polarity == 'negative': add_text = extra_text['positive'] else: add_text = extra_text['neutral'] add_sentence = self._get_add_sentence(add_text, all_term) trans_sentence = self._concatenate_sentence(add_sentence) trans_sample.update_sentence(trans_sentence) trans_sample.trans_id = term_id trans_samples.append(trans_sample) return trans_samples def _get_add_sentence(self, add_text, all_term): r""" Get the sentence that owns different polarity compared with the aspect. Choose 1~3 sentences randomly from add_text and put them together. :param list add_text: extra text :param list all_term: all aspect term :return list: extra sentence that need to be added to original sentence """ sentence = self.sentence punctuation = '.' if sentence[-1] == string.punctuation: punctuation = sentence[-1] while True: add_num = random.randint(1, 3) rand_num = random.sample(range(len(add_text)), 3) rand_terms = [add_text[i][0] for i in rand_num] rand_sentences = [self.tokenize(add_text[i][1][0]) for i in rand_num] for idx, ran_sentence in enumerate(rand_sentences): if rand_terms[idx] in all_term: break if ran_sentence[-1] in string.punctuation: rand_sentences[idx] = ran_sentence[:-1] if add_num == 3: add_sentence = rand_sentences[0] + [','] + rand_sentences[1] + [ 'and'] + rand_sentences[2] + [punctuation] elif add_num == 2: add_sentence = rand_sentences[0] + ['and'] + rand_sentences[1] \ + [punctuation] else: add_sentence = rand_sentences[0] + [punctuation] break return add_sentence def _concatenate_sentence(self, add_sentence): r""" Concatenate the extra part to original sentence. :param list add_sentence: extra sentence that need to be added to original sentence :return list: transformed sentence """ sentence = self.sentence[:-1] opi_tag = self.get_postag(add_sentence, 0, 1) if opi_tag[0] != 'CONJ': trans_sentence = self.untokenize(add_sentence) if 'but' in sentence or 'although' in sentence: trans_sentence = sentence + "; " + trans_sentence else: trans_sentence = sentence + ", but " + trans_sentence else: trans_sentence = self.untokenize(add_sentence) trans_sentence = sentence + ". " + trans_sentence[ 0].upper() + trans_sentence[1:] return trans_sentence