# -*- coding: utf-8 -*-
"""
Created on Fri Oct  3 11:27:11 2025

@author: Moritz Romeike
"""

# ------------------------------------------------------------------------
# Programmcode 01 (Python): Generierung fiktiver Daten für Data-Analytics-Beispiel
# ------------------------------------------------------------------------
import numpy as np
import pandas as pd
from pathlib import Path

# Reproduzierbarkeit (seed auf 42 setzen)
np.random.seed(42)

# Anzahl Datensätze
num_records = 100

# Felder erzeugen
project_ids = np.arange(1, num_records + 1)

risk_categories_pool = ["Operational", "Financial", "Strategic", "Compliance"]
risk_categories = np.random.choice(risk_categories_pool, size=num_records, replace=True)

risk_descriptions_pool = [
    "System failure", "Market volatility", "Change in leadership", "New regulations",
    "Supply chain disruption", "Currency fluctuations", "Mergers and acquisitions", "Tax policy changes",
    "Data breach", "Interest rate changes", "Competitive pressure", "Legal issues",
    "Workplace accidents", "Credit risk", "Product innovation", "Environmental regulations",
    "Human error", "Liquidity risk", "Reputation risk", "Health and safety compliance"
]
risk_descriptions = np.random.choice(risk_descriptions_pool, size=num_records, replace=True)

# Gleich wie runif(0.1, 0.9)
likelihood = np.random.uniform(0.1, 0.9, size=num_records)

# Impact als nullable Int64, damit <NA> möglich ist (ersetzt R-NA in Integer-Spalten)
impact = pd.Series(np.random.choice(np.arange(1, 11), size=num_records, replace=True), dtype="Int64")

# Risk Score
risk_score = pd.Series(likelihood * impact.astype(float))

# Ausreißer setzen (R: 96..100 -> Python: 95..99)
outlier_idx = [95, 96, 97, 98, 99]
risk_score.iloc[outlier_idx] = risk_score.iloc[outlier_idx] * 10
impact.iloc[outlier_idx] = impact.iloc[outlier_idx] + 10

# Gezielte NAs setzen
risk_score.iloc[[4, 14, 24, 34, 44]] = np.nan       # R: 5,15,25,35,45
impact.iloc[[9, 19, 29, 39, 49]] = pd.NA            # R: 10,20,30,40,50

# DataFrame bauen
data = pd.DataFrame({
    "Project_ID": project_ids,
    "Risk_Category": risk_categories,
    "Risk_Description": risk_descriptions,
    "Likelihood": likelihood,
    "Impact": impact,
    "Risk_Score": risk_score
})

# Excel-Datei schreiben
data.to_excel("Kap_2.2_Risk_Management_Data.xlsx", index=False)

# Erste Zeilen ansehen
print(data.head())
# ------------------------------------------------------------------------
