diff --git a/Graph/goal_position_probability.png b/Graph/goal_position_probability.png index 799c14cfe330cef5884bd9fa555ec599bc148238..3253caacb96f5d7d4176f72af745956ba65f7600 100644 Binary files a/Graph/goal_position_probability.png and b/Graph/goal_position_probability.png differ diff --git a/Graph/goal_prob_by_manpowersituation.png b/Graph/goal_prob_by_manpowersituation.png index 3d6b660a746e16ab1661d0a06d7bd28239f2971d..5c0a24d4cad22ed3113ebfef4faacf29b8980c94 100644 Binary files a/Graph/goal_prob_by_manpowersituation.png and b/Graph/goal_prob_by_manpowersituation.png differ diff --git a/Graph/goal_prob_by_scorediff.png b/Graph/goal_prob_by_scorediff.png index 61c0247c44a2e4cc5a06edb1f4277a198bfbc465..fc1dd4fb06aa0dc1586d8480df05b1e2dbac9e3b 100644 Binary files a/Graph/goal_prob_by_scorediff.png and b/Graph/goal_prob_by_scorediff.png differ diff --git a/Graph/xgb_feature_importance.png b/Graph/xgb_feature_importance.png index 4257a513083d759dd69a9e0f6de52382fa8edb7a..1452e4846d0ac47a20f222532905ee0ad8a54862 100644 Binary files a/Graph/xgb_feature_importance.png and b/Graph/xgb_feature_importance.png differ diff --git a/Scripts/main_XGB.py b/Scripts/main_XGB.py index a2df3592ff6fa59a33eb9ae8e16554cf5344e964..e264e59ee7d26ab8ef258cbe9af5870fb3ba7441 100644 --- a/Scripts/main_XGB.py +++ b/Scripts/main_XGB.py @@ -61,15 +61,15 @@ print(classification_report(y_test, y_pred)) print("=== 混淆矩阵 ===") print(confusion_matrix(y_test, y_pred)) -# === 9. 特征重要性可视化并保存 === + plt.figure(figsize=(12, 8)) xgb.plot_importance(model, height=0.6, max_num_features=20, importance_type='gain') -plt.title("XGBoost 特征重要性(按信息增益)") +plt.title("XGBoost Feature Importance (by Gain)") # 英文标题 plt.tight_layout() -# 👉 保存图像到模型目录 + importance_plot_path = os.path.join("../Graph", "xgb_feature_importance.png") plt.savefig(importance_plot_path, dpi=300, bbox_inches='tight') -print(f"📊 特征重要性图已保存到: {importance_plot_path}") +print(f"📊 Feature importance plot saved to: {importance_plot_path}") plt.show() diff --git a/Scripts/main_XGB2.py b/Scripts/main_XGB2.py index e64783ebbeb616dd8deda17246cae2d7a3681363..5f3d0e5c27e675cedefdd6a6094c75826b1e80bf 100644 --- a/Scripts/main_XGB2.py +++ b/Scripts/main_XGB2.py @@ -70,37 +70,38 @@ df_test['scoredifferential'] = df.loc[df_test.index, 'scoredifferential'] # 10. 进球位置与进球概率关系图 plt.figure(figsize=(10, 6)) plt.scatter(df_test['xadjcoord'], df_test['yadjcoord'], c=df_test['goal_prob'], cmap='coolwarm', alpha=0.5) -plt.colorbar(label="进球概率") -plt.title("进球位置与进球概率的关系") +plt.colorbar(label="Goal Probability") +plt.title("Relationship Between Goal Location and Goal Probability") plt.xlabel("xadjcoord") plt.ylabel("yadjcoord") plt.axhline(0, color='gray', linestyle='--') plt.axvline(0, color='gray', linestyle='--') - -# ✅ 保存图像 plt.tight_layout() plt.savefig("../Graph/goal_position_probability.png", dpi=300, bbox_inches='tight') plt.show() + + # 11. 局势 vs 进球概率 plt.figure(figsize=(12, 6)) sns.boxplot(x=df_test['manpowersituation'], y=df_test['goal_prob']) -plt.title("不同局势下的进球概率") -plt.xlabel("比赛局势") -plt.ylabel("进球概率") +plt.title("Goal Probability Across Different Manpower Situations") +plt.xlabel("Manpower Situation") +plt.ylabel("Goal Probability") plt.xticks(rotation=45) plt.tight_layout() -plt.savefig("../Graph/goal_prob_by_manpowersituation.png", dpi=300, bbox_inches='tight') # ✅ 保存 +plt.savefig("../Graph/goal_prob_by_manpowersituation.png", dpi=300, bbox_inches='tight') plt.show() # 12. 比分差 vs 进球概率 plt.figure(figsize=(10, 6)) sns.lineplot(x=df_test['scoredifferential'], y=df_test['goal_prob'], marker='o') -plt.title("比分差与进球概率的关系") -plt.xlabel("比分差(本队 - 对方)") -plt.ylabel("进球概率") +plt.title("Goal Probability vs Score Differential") +plt.xlabel("Score Differential (Team - Opponent)") +plt.ylabel("Goal Probability") plt.tight_layout() -plt.savefig("../Graph/goal_prob_by_scorediff.png", dpi=300, bbox_inches='tight') # ✅ 保存 +plt.savefig("../Graph/goal_prob_by_scorediff.png", dpi=300, bbox_inches='tight') plt.show() + diff --git a/Scripts/process_1.py b/Scripts/process_1.py index 929c1163c750f180ddc4a80896c79dca5df6610c..84c1e8de038cbb27de6b38bf14a0f8c9c90609a4 100644 --- a/Scripts/process_1.py +++ b/Scripts/process_1.py @@ -10,41 +10,29 @@ df = pd.read_csv("Linhac24-25_Sportlogiq.csv") print(df.info()) print(df.head()) -# ============================= -# 1. 各类事件统计 -# ============================= + event_counts = df['eventname'].value_counts() print("事件类型统计:\n", event_counts) -# ============================= -# 2. 分析控球球队的控球次数 -# ============================= + team_possession_counts = df['teaminpossession'].value_counts() print("控球队出现次数:\n", team_possession_counts) -# ============================= -# 3. xG(expected goals)分析 -# ============================= + df['xg_allattempts'] = pd.to_numeric(df['xg_allattempts'], errors='coerce') xg_by_team = df.groupby('teamid')['xg_allattempts'].sum() print("每支球队的总xG:\n", xg_by_team) -# ============================= -# 4. 球员事件参与统计 -# ============================= + player_actions = df['playerid'].value_counts().head(10) print("参与最多事件的前10位球员:\n", player_actions) -# ============================= -# 5. 成功 vs 失败 事件比例 -# ============================= + success_rate = df['outcome'].value_counts(normalize=True) print("事件成功与失败比例:\n", success_rate) -# ============================= -# 6. 替你做一个简单总结 -# ============================= + print("\n简单总结:") print(f"总事件数: {len(df)}") print(f"总xG: {df['xg_allattempts'].sum():.2f}") diff --git a/Scripts/process_2.py b/Scripts/process_2.py index 8a2faba621b2f8353452ff18634ddd779cf7fdea..c5018e18c8967cbf8e777aed901baa9446631f54 100644 --- a/Scripts/process_2.py +++ b/Scripts/process_2.py @@ -3,22 +3,18 @@ import seaborn as sns import matplotlib.pyplot as plt import matplotlib -# 设置支持中文的字体 matplotlib.rcParams['font.family'] = 'Microsoft YaHei' # 设置中文字体为微软雅黑 matplotlib.rcParams['axes.unicode_minus'] = False # 正确显示负号 -# 加载预处理后的数据 + df = pd.read_csv("Linhac24-25_Sportlogiq.csv") -# 设置图形风格 sns.set(style="whitegrid") -# 1. 数据基本信息 print("📌 数据基本信息:") print(df.info()) print("\n📈 描述性统计:") print(df.describe()) -# 2. 类别分布(事件类型) print("\n📊 不同事件类型分布:") print(df['eventname'].value_counts()) @@ -29,7 +25,7 @@ plt.title("事件类型分布") plt.tight_layout() plt.show() -# 3. xG 分布分析 + plt.figure(figsize=(8, 4)) sns.histplot(df['xg_allattempts'], bins=30, kde=True) plt.title("xG 分布") @@ -37,7 +33,7 @@ plt.xlabel("xG 值") plt.tight_layout() plt.show() -# 4. 相关性热力图(数值型特征) + corr = df[['compiledgametime', 'xadjcoord', 'yadjcoord', 'xg_allattempts']].corr() plt.figure(figsize=(6, 4)) sns.heatmap(corr, annot=True, cmap='coolwarm') @@ -45,7 +41,7 @@ plt.title("数值型字段相关性") plt.tight_layout() plt.show() -# 5. 时间分布:每分钟事件数量 + df['event_minute'] = df['compiledgametime'] // 60 minute_event_count = df.groupby('event_minute').size()