# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 10:04:03 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 35 (Python, Ergänzung): CDF und Histogramm in einer Grafik
# GBM-Simulation für Aluminiumpreise mit Histogramm+CDF (Dual-Achse)
# ------------------------------------------------------------------------
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ----------------------------- Settings ---------------------------------
FILE_XLSX = "Kap_9.4_Aluminium_Price_20251110.xlsx"
SEED      = 42
T         = 90         # Vorhersagehorizont (Tage)
N_SIM     = 100_000    # Anzahl Simulationen (Verteilung)
N_PLOT    = 200        # Pfade für Visualisierung
DT        = 1.0        # Zeitschritt (Tage)

# ------------------------------ Load data --------------------------------
base = Path(os.getcwd())

def load_prices():
    xlsx = base / FILE_XLSX
    if xlsx.exists():
        try:
            df = pd.read_excel(xlsx, engine="openpyxl")
            df.columns = ["Datum", "Preis"]
            return df
        except Exception as e:
            print(f"⚠️ Excel nicht lesbar ({e}). Nutze CSV-Fallback …")
    csvp = base / FILE_CSV
    if csvp.exists():
        df = pd.read_csv(csvp)
        df.columns = ["Datum", "Preis"]
        return df
    raise FileNotFoundError(f"Weder Excel noch CSV gefunden:\n- {xlsx}\n- {csvp}")

df = load_prices()
df["Datum"] = pd.to_datetime(df["Datum"], errors="coerce")
df = df.sort_values("Datum").reset_index(drop=True)

# ------------------- Log-Renditen, mu, sigma, S0 -------------------------
df["Renditen"] = np.log(df["Preis"] / df["Preis"].shift(1))
df = df.dropna(subset=["Renditen"]).reset_index(drop=True)

mu    = float(df["Renditen"].mean())   # täglicher Drift
sigma = float(df["Renditen"].std())    # tägliche Volatilität
S0    = float(df["Preis"].iloc[-1])    # letzter Preis

print(f"S0={S0:.4f}, mu(täglich)={mu:.6f}, sigma(täglich)={sigma:.6f}")

# ----------------------- GBM-Simulation (MCS) --------------------
# S_{t+1} = S_t * exp((mu - 0.5*sigma^2)*DT + sigma*sqrt(DT)*Z)
rng = np.random.default_rng(SEED)

# Für Verteilung (N_SIM Pfade, nur Endwerte wichtig)
S = np.empty((T + 1, N_SIM), dtype=float)
S[0, :] = S0
for i in range(1, T + 1):
    Z = rng.standard_normal(N_SIM)
    S[i, :] = S[i - 1, :] * np.exp((mu - 0.5 * sigma**2) * DT + sigma * np.sqrt(DT) * Z)

final_prices = S[-1, :]
q_probs = [0.01, 0.05, 0.10, 0.90, 0.95, 0.99]
quantiles = np.quantile(final_prices, q_probs)
mean_price = float(final_prices.mean())

# Monte-Carlo-Fehler und 95%-Konfidenzintervall für den Mittelwert
mc_error = float(final_prices.std(ddof=1) / np.sqrt(N_SIM))
CI_Lower = mean_price - 1.96 * mc_error
CI_Upper = mean_price + 1.96 * mc_error

# ---------------------- Pfad-Plot (nur 200 Pfade) ------------------------
S_plot = np.empty((T + 1, N_PLOT), dtype=float)
S_plot[0, :] = S0
for i in range(1, T + 1):
    Z = rng.standard_normal(N_PLOT)
    S_plot[i, :] = S_plot[i - 1, :] * np.exp((mu - 0.5 * sigma**2) * DT + sigma * np.sqrt(DT) * Z)

tage = np.arange(T + 1)
mean_path = S_plot.mean(axis=1)

plt.figure()
for j in range(N_PLOT):
    plt.plot(tage, S_plot[:, j], alpha=0.2, linewidth=0.5)
plt.plot(tage, mean_path, linewidth=2.0)
plt.title("Simulierte Aluminiumpreise (GBM) | 200 Pfade")
plt.xlabel("Tage")
plt.ylabel("Preis (EUR pro Tonne)")
plt.tight_layout()
plt.show()

# ---------------- Histogramm & CDF in EINER Grafik -----------------------
# Histogramm (linke y-Achse), CDF (rechte y-Achse) mit Twin-Axis
fig, ax1 = plt.subplots()

# Histogramm
counts, bins, patches = ax1.hist(final_prices, bins=50, alpha=0.6, edgecolor="black")
ax1.set_xlabel("Preis (EUR pro Tonne)")
ax1.set_ylabel("Häufigkeit")
ax1.set_title("Histogramm & CDF der simulierten Aluminiumpreise am Tag 90")

# Skalierung für CDF (rechte Achse 0..1)
ax2 = ax1.twinx()
ax2.set_ylabel("Kumulative Wahrscheinlichkeit")

# ECDF
x_sorted = np.sort(final_prices)
y_ecdf   = np.arange(1, len(x_sorted) + 1) / len(x_sorted)
ax2.step(x_sorted, y_ecdf, where="post")

# Vertikale Linien (Mean, CI, Quantile)
ax1.axvline(mean_price, linestyle="--", linewidth=1.2)
ax1.axvline(CI_Lower, linestyle="--", linewidth=1.0)
ax1.axvline(CI_Upper, linestyle="--", linewidth=1.0)
for q in quantiles:
    ax1.axvline(q, linestyle=":", linewidth=1.0)

# Annotationen (dezente Positionen)
ymax = counts.max() if len(counts) else 0
ax1.text(mean_price, ymax*0.95, f"Mean: {mean_price:.2f}", rotation=90, va="top")
ax1.text(CI_Lower,  ymax*0.90, f"CI Low: {CI_Lower:.2f}", rotation=90, va="top")
ax1.text(CI_Upper,  ymax*0.85, f"CI Up:  {CI_Upper:.2f}", rotation=90, va="top")
labels = ["P1","P5","P10","P90","P95","P99"]
for q, lab, y in zip(quantiles, labels, [0.88,0.86,0.84,0.82,0.80,0.78]):
    ax1.text(q, ymax*y, f"{lab}: {q:.2f}", rotation=90, va="top")

fig.tight_layout()
plt.show()

# ------------------------- Konsolen-Output -------------------------------
print("\nZusammenfassung Tag 90")
print(f"  Mittelwert         : {mean_price:.4f}")
print(f"  MC-Fehler (StdErr) : {mc_error:.6f}")
print(f"  95%-CI (Mean)      : [{CI_Lower:.4f}, {CI_Upper:.4f}]")
for lab, q in zip(labels, quantiles):
    print(f"  {lab:<3}: {q:.4f}")
# ------------------------------------------------------------------------