政治敏感检测:识别涉及政治风险的文本内容
FreeGuideOnline
最新
2026-06-29
法轮功 六四事件 台独
### 核心代码实现
```python
import re
class SimplePoliticalDetector:
def __init__(self, sensitive_file):
with open(sensitive_file, 'r', encoding='utf-8') as f:
self.keywords = [line.strip() for line in f if line.strip()]
self.variant_map = self._build_variant_map() # 简化变形映射表
def _build_variant_map(self):
# 仅示例性列出部分谐音、拆字映射
return {
'氵去': '法',
'车仑': '轮',
'工力': '功',
'六四': '六四',
'台': '台',
'獨': '独',
}
def normalize_text(self, text):
# 替换已知变形词为规范词
for var, norm in self.variant_map.items():
text = text.replace(var, norm)
return text
def check(self, text):
norm_text = self.normalize_text(text)
hits = []
for kw in self.keywords:
if kw in norm_text:
hits.append(kw)
return hits
# 使用示例
detector = SimplePoliticalDetector('sensitive_words.txt')
samples = [
"坚决反对台独分裂行径",
"有人在讨论氵去车仑功的练法",
"今天天气真好",
]
for s in samples:
result = detector.check(s)
if result:
print(f"【风险】文本:{s} -> 命中敏感词:{result}")
else:
print(f"【安全】文本:{s}")