Equalized Odds:均衡化正误率差异的公平性指标
python import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from fairlearn.reductions import ExponentiatedGradient, EqualizedOdds
模拟数据集:二分类任务,属性 A 为敏感特征
np.random.seed(42) n = 1000 X = np.random.randn(n, 2) A = np.random.binomial(1, 0.5, n) # 敏感属性
真实标签生成,使原数据可能存在偏差
Y = (X[:,0] + X[:,1] + 0.5 * A > 0.5).astype(int)
划分训练/测试
X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split( X, Y, A, test_size=0.3, random_state=0)
选用指数削减梯度法,传入公平性约束 EqualizedOdds
estimator = LogisticRegression(solver='liblinear') mitigator = ExponentiatedGradient( estimator, constraints=EqualizedOdds() )
mitigator.fit(X_train, Y_train, sensitive_features=A_train) Y_pred = mitigator.predict(X_test)
评估每个群组的 TPR 与 FPR
def compute_fairness_metrics(y_true, y_pred, group): tp = np.sum((y_true==1) & (y_pred==1) & (group==1)) / np.sum((y_true==1) & (group==1)) fp = np.sum((y_true==0) & (y_pred==1) & (group==1)) / np.sum((y_true==0) & (group==1)) return tp, fp
for g in [0, 1]: tpr, fpr = compute_fairness_metrics(Y_test, Y_pred, A_test == g) print(f"组别 A={g}: TPR={tpr:.3f}, FPR={fpr:.3f}")