方面级情感分析 ABSA:针对特定属性的情感判断
FreeGuideOnline
最新
2026-06-23
bash pip install transformers datasets torch
### 数据格式
假设我们的训练数据为 CSV,包含三列:`sentence`(文本)、`aspect`(方面项)、`sentiment`(情感:positive/negative/neutral)。
### 构建数据集与模型
```python
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import pandas as pd
# 读取数据
df = pd.read_csv('restaurant_reviews.csv')
# 构造 BERT 的输入格式:将句子和方面拼接,用 [SEP] 分隔
def create_input(example):
return tokenizer(example['sentence'], example['aspect'],
truncation=True, padding='max_length', max_length=128)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = Dataset.from_pandas(df)
dataset = dataset.map(create_input, batched=False)
# 标签映射
label2id = {'negative':0, 'neutral':1, 'positive':2}
id2label = {v:k for k,v in label2id.items()}
def encode_labels(example):
example['labels'] = label2id[example['sentiment']]
return example
dataset = dataset.map(encode_labels)
# 划分训练/验证集
dataset = dataset.train_test_split(test_size=0.2)
训练与评估
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
num_labels=3,
id2label=id2label,
label2id=label2id)
training_args = TrainingArguments(
output_dir='./results',
evaluation_strategy='epoch',
save_strategy='epoch',
num_train_epochs=5,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
logging_dir='./logs',
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset['train'],
eval_dataset=dataset['test'],
)
trainer.train()
预测新样本
def predict(sentence, aspect):
inputs = tokenizer(sentence, aspect, return_tensors='pt', truncation=True, padding=True)
outputs = model(**inputs)
pred = outputs.logits.argmax(-1).item()
return id2label[pred]
print(predict("The pasta was delicious but the waiter was rude.", "pasta"))
# 输出: positive
print(predict("The pasta was delicious but the waiter was rude.", "waiter"))
# 输出: negative