出行需求预测:网约车与共享单车的高精度需求预估
python import pandas as pd import numpy as np import lightgbm as lgb from sklearn.metrics import mean_absolute_error from sklearn.model_selection import TimeSeriesSplit
假设 df 已包含所有特征和 'demand' 列
按照时间顺序排序
df = df.sort_values('timestamp')
分离特征与目标
features = [col for col in df.columns if col not in ['demand', 'timestamp', 'grid_id']] X = df[features] y = df['demand']
按时间划分训练集、验证集
train_cutoff = int(len(df) * 0.7) valid_cutoff = int(len(df) * 0.85)
X_train, y_train = X.iloc[:train_cutoff], y.iloc[:train_cutoff] X_valid, y_valid = X.iloc[train_cutoff:valid_cutoff], y.iloc[train_cutoff:valid_cutoff] X_test, y_test = X.iloc[valid_cutoff:], y.iloc[valid_cutoff:]
训练 LightGBM 回归模型
model = lgb.LGBMRegressor( objective='regression', n_estimators=500, learning_rate=0.05, max_depth=7, num_leaves=63, random_state=42 ) model.fit( X_train, y_train, eval_set=[(X_valid, y_valid)], callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)] )
预测与评估
y_pred = model.predict(X_test) mae = mean_absolute_error(y_test, y_pred) print(f'Test MAE: {mae:.2f}')
特征重要性
importance = pd.DataFrame({ 'feature': features, 'importance': model.feature_importances_ }).sort_values('importance', ascending=False) print(importance.head(10))