# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 10:06:57 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 36 (Python): GBM-Simulation mit Jump-Prozess für Aluminiumpreise
# Replikation des R-Codes mit: Daten laden, Renditen, GBM+Jump, Plots, Quantile
# ------------------------------------------------------------------------
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ----------------------------- Parameter ---------------------------------
FILE_XLSX   = "Kap_9.4_Aluminium_Price_20251110.xlsx"
SEED        = 42
T           = 90            # Vorhersagehorizont (Tage)
N_SIM       = 10_000        # Anzahl Simulationen
N_PLOT      = 200           # Pfade für Visualisierung
DT          = 1.0           # Zeitschritt (Tage)
JUMP_INTENS = 0.15          # +15% Sprung
# ------------------------------------------------------------------------

# ----------------------------- Daten laden -------------------------------
base = Path(os.getcwd())

def load_prices():
    xlsx = base / FILE_XLSX
    if xlsx.exists():
        try:
            df = pd.read_excel(xlsx, engine="openpyxl")
            df.columns = ["Datum", "Preis"]
            return df
        except Exception as e:
            print(f"⚠️ Excel nicht lesbar ({e}). CSV-Fallback …")


aluminium_data = load_prices()

# Datum parsen und sortieren
aluminium_data["Datum"] = pd.to_datetime(aluminium_data["Datum"], errors="coerce")
aluminium_data = aluminium_data.sort_values("Datum").reset_index(drop=True)

# Tägliche log-Renditen
aluminium_data["Renditen"] = np.log(aluminium_data["Preis"] / aluminium_data["Preis"].shift(1))
aluminium_data = aluminium_data.dropna(subset=["Renditen"]).reset_index(drop=True)

# Drift und Volatilität (täglich)
mu    = float(aluminium_data["Renditen"].mean())
sigma = float(aluminium_data["Renditen"].std())

# Startpreis (letzter bekannter Preis)
S0 = float(aluminium_data["Preis"].iloc[-1])

print(f"S0={S0:.4f}, mu(täglich)={mu:.6f}, sigma(täglich)={sigma:.6f}")

# ----------------------- GBM + Jump-Diffusion ----------------------------
# Diskret: S_{t+1} = S_t * exp((mu - 0.5*sigma^2)*DT + sigma*sqrt(DT)*Z) * Jump
rng = np.random.default_rng(SEED)

# Simulationsmatrix
S = np.empty((T + 1, N_SIM), dtype=float)
S[0, :] = S0

# Zufälliger Sprungtag je Pfad (zwischen Tag 2..T analog R)
jump_days = rng.integers(low=2, high=T + 1, size=N_SIM)  # Python-Index: 1..T -> wir nutzen 2..T inkl.

for i in range(1, T + 1):
    Z = rng.standard_normal(N_SIM)
    drift = (mu - 0.5 * sigma**2) * DT
    diffu = sigma * np.sqrt(DT) * Z
    S[i, :] = S[i - 1, :] * np.exp(drift + diffu)

    # Jump anwenden, wenn Pfad seinen Sprungtag erreicht
    mask_jump = (i == jump_days)
    if mask_jump.any():
        S[i, mask_jump] *= (1.0 + JUMP_INTENS)

# ---------------------- Visualisierungsdaten (200 Pfade) -----------------
sim_idx = np.arange(min(N_PLOT, N_SIM))
S_plot = S[:, sim_idx]
tage = np.arange(T + 1)
mean_path = S_plot.mean(axis=1)

# ----------------------------- Grafik 1: Pfade ---------------------------
plt.figure()
for j in sim_idx:
    plt.plot(tage, S_plot[:, j], alpha=0.2, linewidth=0.5)
plt.plot(tage, mean_path, linewidth=2.0)
plt.title("Simulierte Aluminiumpreise mit Jump-Prozess (GBM) | 200 Pfade")
plt.xlabel("Tage")
plt.ylabel("Preis (EUR pro Tonne)")
plt.tight_layout()
plt.show()

# ------------------ Quantile & Endverteilung (Tag 90) --------------------
final_prices = S[-1, :]
quantiles = np.quantile(final_prices, [0.01, 0.05, 0.10, 0.90, 0.95, 0.99])
mean_price = float(final_prices.mean())

# --------------------------- Grafik 2: Histogramm ------------------------
plt.figure()
counts, bins, _ = plt.hist(final_prices, bins=50, alpha=0.6, edgecolor="black")
plt.axvline(mean_price, color="red", linestyle="--", linewidth=1.2)
plt.title("Histogramm der simulierten Aluminiumpreise (Tag 90)")
plt.xlabel("Preis (EUR pro Tonne)")
plt.ylabel("Häufigkeit")
plt.tight_layout()
plt.show()

# ----------------------------- Grafik 3: CDF -----------------------------
# ECDF ohne Zusatzpakete
x_sorted = np.sort(final_prices)
y_ecdf   = np.arange(1, len(x_sorted) + 1) / len(x_sorted)

plt.figure()
plt.step(x_sorted, y_ecdf, where="post")
plt.axvline(mean_price, color="red", linestyle="--", linewidth=1.2)
plt.title("Kumulative Verteilungsfunktion (CDF) der simulierten Preise (Tag 90)")
plt.xlabel("Preis (EUR pro Tonne)")
plt.ylabel("Wahrscheinlichkeit")
plt.tight_layout()
plt.show()

# --------------------------- Konsolen-Output -----------------------------
print("\nQuantile (Tag 90):")
for p, q in zip([1,5,10,90,95,99], quantiles):
    print(f"  P{p:>2}: {q:.4f}")
print(f"  Mean: {mean_price:.4f}")
# ------------------------------------------------------------------------
