# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 07:36:58 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 22 (Python): Scatterplot mit Spearman-Rangkorrelation
# ------------------------------------------------------------------------
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Optional: Spearman mit p-Wert, falls SciPy installiert ist
try:
    from scipy.stats import spearmanr
    HAS_SCIPY = True
except ImportError:
    HAS_SCIPY = False

# === Daten einlesen (Excel-Datei muss existieren) ===
df_spearman = pd.read_excel("Kap_4.8.2_diodenproduktion_kendall.xlsx")

col_prod = "Produktion (Stück)"
col_score = "Qualitätsscore"

x = pd.to_numeric(df_spearman[col_prod], errors="coerce")
y = pd.to_numeric(df_spearman[col_score], errors="coerce")

mask = ~x.isna() & ~y.isna()
x, y = x[mask], y[mask]

# === Spearman-Korrelation berechnen ===
if HAS_SCIPY:
    spearman_corr, spearman_p = spearmanr(x, y)
else:
    # Fallback: Rang-Korrelation manuell über numpy.corrcoef
    rank_x = x.rank().to_numpy()
    rank_y = y.rank().to_numpy()
    spearman_corr = float(np.corrcoef(rank_x, rank_y)[0, 1])
    spearman_p = None

# === Scatterplot mit LOESS-ähnlicher Glättung (Moving Average) ===
plt.figure(figsize=(7,5))
plt.scatter(x, y, alpha=0.7, color="blue", label="Daten")

# einfache Glättung (rollender Mittelwert als Ersatz für LOESS)
order = np.argsort(x)
x_sorted, y_sorted = x.iloc[order].to_numpy(), y.iloc[order].to_numpy()
window = max(5, len(x)//10)
y_smooth = pd.Series(y_sorted).rolling(window=window, center=True).mean()
plt.plot(x_sorted, y_smooth, color="red", linewidth=2, label=f"Trend (MA {window})")

# Annotation mit Korrelationskoeffizient
plt.annotate(
    f"Spearman: {spearman_corr:.3f}" + (f"\np={spearman_p:.4f}" if spearman_p is not None else ""),
    xy=(x.min()+1000, y.max()-5), ha="left", va="top", fontsize=10, color="black"
)

plt.title("Scatterplot: Produktion vs. Qualitätsscore")
plt.xlabel(col_prod)
plt.ylabel(col_score)
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

# === Ausgabe in Konsole ===
print(f"Spearman-Rangkorrelation: {spearman_corr:.3f}")
if spearman_p is not None:
    print(f"p-Wert: {spearman_p:.5f}")
# ------------------------------------------------------------------------
