方面级情感分析 ABSA:针对特定属性的情感判断

FreeGuideOnline 最新 2026-06-23

bash pip install transformers datasets torch


### 数据格式

假设我们的训练数据为 CSV,包含三列:`sentence`(文本)、`aspect`(方面项)、`sentiment`(情感:positive/negative/neutral)。

### 构建数据集与模型

```python
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd

# 读取数据
df = pd.read_csv('restaurant_reviews.csv')
# 构造 BERT 的输入格式:将句子和方面拼接,用 [SEP] 分隔
def create_input(example):
    return tokenizer(example['sentence'], example['aspect'],
                     truncation=True, padding='max_length', max_length=128)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = Dataset.from_pandas(df)
dataset = dataset.map(create_input, batched=False)

# 标签映射
label2id = {'negative':0, 'neutral':1, 'positive':2}
id2label = {v:k for k,v in label2id.items()}

def encode_labels(example):
    example['labels'] = label2id[example['sentiment']]
    return example
dataset = dataset.map(encode_labels)

# 划分训练/验证集
dataset = dataset.train_test_split(test_size=0.2)

训练与评估

model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels=3,
                                                      id2label=id2label,
                                                      label2id=label2id)

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    logging_dir='./logs',
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
)

trainer.train()

预测新样本

def predict(sentence, aspect):
    inputs = tokenizer(sentence, aspect, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs)
    pred = outputs.logits.argmax(-1).item()
    return id2label[pred]

print(predict("The pasta was delicious but the waiter was rude.", "pasta"))
# 输出: positive
print(predict("The pasta was delicious but the waiter was rude.", "waiter"))
# 输出: negative