政治敏感检测:识别涉及政治风险的文本内容

FreeGuideOnline 最新 2026-06-29

法轮功 六四事件 台独


### 核心代码实现
```python
import re

class SimplePoliticalDetector:
    def __init__(self, sensitive_file):
        with open(sensitive_file, 'r', encoding='utf-8') as f:
            self.keywords = [line.strip() for line in f if line.strip()]
        self.variant_map = self._build_variant_map()  # 简化变形映射表

    def _build_variant_map(self):
        # 仅示例性列出部分谐音、拆字映射
        return {
            '氵去': '法',
            '车仑': '轮',
            '工力': '功',
            '六四': '六四',
            '台': '台',
            '獨': '独',
        }

    def normalize_text(self, text):
        # 替换已知变形词为规范词
        for var, norm in self.variant_map.items():
            text = text.replace(var, norm)
        return text

    def check(self, text):
        norm_text = self.normalize_text(text)
        hits = []
        for kw in self.keywords:
            if kw in norm_text:
                hits.append(kw)
        return hits

# 使用示例
detector = SimplePoliticalDetector('sensitive_words.txt')
samples = [
    "坚决反对台独分裂行径",
    "有人在讨论氵去车仑功的练法",
    "今天天气真好",
]
for s in samples:
    result = detector.check(s)
    if result:
        print(f"【风险】文本:{s} -> 命中敏感词:{result}")
    else:
        print(f"【安全】文本:{s}")