# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 10:01:39 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 35 (Python): Simulation zukünftiger Aluminiumpreise mit GBM
# ------------------------------------------------------------------------
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ----------------------------- Einstellungen -----------------------------
FILE_XLSX = "Kap_9.4_Aluminium_Price_20251110.xlsx"
SEED      = 42
T         = 90       # Tage für die Vorhersage
N_SIM     = 10_000   # Anzahl Simulationen (für Verteilung)
N_PLOT    = 200      # Anzahl Pfade für Visualisierung
DT        = 1.0      # Zeitschritt in Tagen

# ----------------------------- Daten einlesen -----------------------------
base = Path(os.getcwd())

def load_prices():
    # 1) Excel
    xlsx_path = base / FILE_XLSX
    if xlsx_path.exists():
        try:
            df = pd.read_excel(xlsx_path, engine="openpyxl")
            df.columns = ["Datum", "Preis"]
            return df
        except Exception as e:
            print(f"⚠️ Konnte Excel nicht lesen ({e}). Versuche CSV-Fallback...")
    # 2) CSV (Komma-Separator, Dezimalpunkt; ggf. anpassen)
    csv_path = base / FILE_CSV
    if csv_path.exists():
        df = pd.read_csv(csv_path)
        df.columns = ["Datum", "Preis"]
        return df
    raise FileNotFoundError(
        f"Weder Excel noch CSV gefunden/lesbar:\n- {xlsx_path}\n- {csv_path}"
    )

aluminium_data = load_prices()

# Datum parsen, sortieren (wie tidyverse::arrange)
aluminium_data["Datum"] = pd.to_datetime(aluminium_data["Datum"], errors="coerce")
aluminium_data = aluminium_data.sort_values("Datum").reset_index(drop=True)

# -------------------- Logarithmische Renditen (täglich) -------------------
# Rendite_t = ln(Preis_t / Preis_{t-1})
aluminium_data["Renditen"] = np.log(aluminium_data["Preis"] / aluminium_data["Preis"].shift(1))
aluminium_data = aluminium_data.dropna(subset=["Renditen"]).reset_index(drop=True)

# Drift (mu) und Volatilität (sigma) aus den Tagesrenditen
mu    = aluminium_data["Renditen"].mean()
sigma = aluminium_data["Renditen"].std()

# Startpreis (letzter bekannter Preis)
S0 = float(aluminium_data["Preis"].iloc[-1])

print(f"Startpreis S0: {S0:.4f}")
print(f"mu (täglich): {mu:.6f}")
print(f"sigma (täglich): {sigma:.6f}")

# ---------------------- GBM-Simulation (MCS) ----------------------
# Diskrete GBM: S_{t+1} = S_t * exp((mu - 0.5*sigma^2)*dt + sigma*sqrt(dt)*Z)
rng = np.random.default_rng(SEED)

# Simulation für Verteilung (N_SIM Pfade, nur letzte Werte wichtig)
S = np.empty((T + 1, N_SIM), dtype=float)
S[0, :] = S0
for i in range(1, T + 1):
    Z = rng.standard_normal(N_SIM)
    S[i, :] = S[i - 1, :] * np.exp((mu - 0.5 * sigma**2) * DT + sigma * np.sqrt(DT) * Z)

final_prices = S[-1, :]
quantiles = np.quantile(final_prices, [0.01, 0.05, 0.10, 0.90, 0.95, 0.99])
mean_price = final_prices.mean()

# ------------------- Daten für Visualisierung (N_PLOT) --------------------
S_plot = np.empty((T + 1, N_PLOT), dtype=float)
S_plot[0, :] = S0
for i in range(1, T + 1):
    Z = rng.standard_normal(N_PLOT)
    S_plot[i, :] = S_plot[i - 1, :] * np.exp((mu - 0.5 * sigma**2) * DT + sigma * np.sqrt(DT) * Z)

tage = np.arange(T + 1)
mean_path = S_plot.mean(axis=1)

# --------------------------- Grafik 1: Stochastische Pfade (200 Pfade angezeigt)---
plt.figure()
for j in range(N_PLOT):
    plt.plot(tage, S_plot[:, j], alpha=0.2, linewidth=0.6)
plt.plot(tage, mean_path, linewidth=2.0)  # Mittelwert-Pfad
plt.title("Simulierte Aluminiumpreise (GBM) | 200 Pfade")
plt.xlabel("Tage")
plt.ylabel("Preis (EUR pro Tonne)")
plt.tight_layout()
plt.show()

# -------------------- Grafik 2: Histogramm Tag 90 -------------------------
plt.figure()
plt.hist(final_prices, bins=50, alpha=0.6, edgecolor="black")
plt.axvline(mean_price, linestyle="--", linewidth=1.2)
for q in quantiles:
    plt.axvline(q, linestyle=":")
plt.title("Histogramm der simulierten Aluminiumpreise am Tag 90")
plt.xlabel("Preis (EUR pro Tonne)")
plt.ylabel("Häufigkeit")
# kleine Annotationen:
ymax = plt.gca().get_ylim()[1]
plt.text(mean_price, 0.95*ymax, f"Mean: {mean_price:.2f}", rotation=90, va="top")
labels = ["P1", "P5", "P10", "P90", "P95", "P99"]
for q, lab in zip(quantiles, labels):
    plt.text(q, 0.9*ymax, f"{lab}: {q:.2f}", rotation=90, va="top")
plt.tight_layout()
plt.show()

# ------------- Grafik 3: Kumulative Verteilungsfunktion (CDF) -------------
x_sorted = np.sort(final_prices)
y_ecdf   = np.arange(1, len(x_sorted) + 1) / len(x_sorted)

plt.figure()
plt.step(x_sorted, y_ecdf, where="post")
plt.axvline(mean_price, linestyle="--", linewidth=1.2)
for q in quantiles:
    plt.axvline(q, linestyle=":")
plt.title("CDF der simulierten Aluminiumpreise am Tag 90")
plt.xlabel("Preis (EUR pro Tonne)")
plt.ylabel("Wahrscheinlichkeit")
# Anmerkungen
plt.text(mean_price, 0.1, f"Mean: {mean_price:.2f}", rotation=90, va="bottom")
for q, lab, y in zip(quantiles, labels, [0.15, 0.20, 0.25, 0.75, 0.80, 0.85]):
    plt.text(q, y, f"{lab}: {q:.2f}", rotation=90, va="bottom")
plt.tight_layout()
plt.show()

# -------------------------- Konsolen-Ausgabe ------------------------------
print("\nZusammenfassung Tag 90")
print(f"  Mittelwert: {mean_price:.4f}")
for lab, q in zip(labels, quantiles):
    print(f"  {lab}: {q:.4f}")
# ------------------------------------------------------------------------
