# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 07:38:30 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 23 (Python): Scatterplot mit Kendall’schem Rangkorrelationskoeffizient
# ------------------------------------------------------------------------
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Optional: Kendall mit p-Wert, falls SciPy installiert ist
try:
    from scipy.stats import kendalltau
    HAS_SCIPY = True
except ImportError:
    HAS_SCIPY = False

# === Excel-Datei einlesen ===
df_kendall = pd.read_excel("Kap_4.8.2_diodenproduktion_kendall.xlsx")

col_prod = "Produktion (Stück)"
col_score = "Qualitätsscore"

x = pd.to_numeric(df_kendall[col_prod], errors="coerce")
y = pd.to_numeric(df_kendall[col_score], errors="coerce")

mask = ~x.isna() & ~y.isna()
x, y = x[mask], y[mask]

# === Kendall-Korrelation berechnen ===
if HAS_SCIPY:
    kendall_corr, kendall_p = kendalltau(x, y)
else:
    # Fallback: mit pandas corr()
    kendall_corr = x.corr(y, method="kendall")
    kendall_p = None

# === Scatterplot mit Trend (Moving Average als Ersatz für LOESS) ===
plt.figure(figsize=(7,5))
plt.scatter(x, y, alpha=0.7, color="blue", label="Daten")

# einfache Glättung (rollender Mittelwert)
order = np.argsort(x)
x_sorted, y_sorted = x.iloc[order].to_numpy(), y.iloc[order].to_numpy()
window = max(5, len(x)//10)
y_smooth = pd.Series(y_sorted).rolling(window=window, center=True).mean()
plt.plot(x_sorted, y_smooth, color="red", linewidth=2, label=f"Trend (MA {window})")

# Annotation mit Kendall-Koeffizient
plt.annotate(
    f"Kendall: {kendall_corr:.3f}" + (f"\np={kendall_p:.4f}" if kendall_p is not None else ""),
    xy=(x.min()+1000, y.max()-5), ha="left", va="top", fontsize=10, color="black"
)

plt.title("Scatterplot: Produktion vs. Qualitätsscore (Kendall)")
plt.xlabel(col_prod)
plt.ylabel(col_score)
plt.legend()
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()
plt.show()

# === Ausgabe in Konsole ===
print(f"Kendall-Rangkorrelation: {kendall_corr:.3f}")
if kendall_p is not None:
    print(f"p-Wert: {kendall_p:.5f}")
# ------------------------------------------------------------------------
