# -*- coding: utf-8 -*-
"""
Created on Fri Oct  3 11:49:45 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 04 (Python): Identifikation & Visualisierung von Ausreißern in Risk_Score
# ------------------------------------------------------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Skript-Ordner (in Spyder als .py ausführen)
base_path = Path(__file__).resolve().parent

# Dateien
imputed_path = base_path / "Kap_2.2_Risk_Management_Data_Imputed.xlsx"
original_path = base_path / "Kap_2.2_Risk_Management_Data.xlsx"

# Falls Imputation noch nicht gelaufen ist, nimm Original
data_path = imputed_path if imputed_path.exists() else original_path

# Daten einlesen + sicherstellen, dass Risk_Score numerisch ist
data = pd.read_excel(data_path)
data["Risk_Score"] = pd.to_numeric(data["Risk_Score"], errors="coerce")

print("Struktur der eingelesenen Daten:")
print(data.info())
print("\nErste Zeilen der Daten:")
print(data.head())

# IQR-Methode
q1 = data["Risk_Score"].quantile(0.25)
q3 = data["Risk_Score"].quantile(0.75)
iqr = q3 - q1
lower = q1 - 1.5 * iqr
upper = q3 + 1.5 * iqr

outlier_mask = (data["Risk_Score"] < lower) | (data["Risk_Score"] > upper)
outliers = data.loc[outlier_mask].copy()

print(f"\nIQR-Grenzen: lower={lower:.4f}, upper={upper:.4f}")
print(f"Anzahl Ausreißer: {outliers.shape[0]}")
if not outliers.empty:
    print("\nAusreißer (Project_ID, Risk_Score):")
    print(outliers[["Project_ID", "Risk_Score"]].to_string(index=False))

# Boxplot + Ausreißerpunkten + Labels
fig, ax = plt.subplots(figsize=(6, 6))
ax.boxplot(data["Risk_Score"].dropna(), vert=True)

# Punkte auf x=1 setzen (ein Boxplot)
xpos = np.full(outliers.shape[0], 1.0)
ax.scatter(xpos, outliers["Risk_Score"])

# Labels leicht versetzt
for x, y, pid in zip(xpos, outliers["Risk_Score"], outliers["Project_ID"]):
    ax.annotate(str(pid), (x, y), xytext=(0.05, 0.5),
                textcoords="offset points", ha="left", va="bottom", fontsize=8)

ax.set_title("Boxplot of Risk_Score with Outliers")
ax.set_xlabel("")
ax.set_ylabel("Risk_Score")
plt.tight_layout()

# Ausgaben speichern
img_path = base_path / "Risk_Score_Boxplot_Outliers.png"
csv_path = base_path / "Risk_Score_Outliers.csv"
plt.savefig(img_path, dpi=150)
outliers.to_csv(csv_path, index=False)

print(f"\nGrafik gespeichert unter: {img_path}")
print(f"Ausreißer-Tabelle gespeichert unter: {csv_path}")
# ------------------------------------------------------------------------
