# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 09:21:49 2025

@author: Moritz Romeike
"""
# --------------------------------------------------------------------------------
# Programmcode 31 (Python): Vergleich der hierarchischen Verfahren anhand der Dendrogramme
# --------------------------------------------------------------------------------

# Python-Pakete und Bibliotheken laden -------------------------------------------

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt

# Aufbereitete Daten laden -------------------------------------------------------
# sep=';' und decimal=','
data_supp_invoice = pd.read_csv("daten_kmeans_clustering.csv", sep=";", decimal=",")

# Datenstandardisierung mit der z-Transformation ---------------------------------
numeric_cols = data_supp_invoice.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
invoices_stand = scaler.fit_transform(data_supp_invoice[numeric_cols])

# Berechnung der euklidischen Distanzen der Objektpaare
# Clusterlösung und Dendrogramm für verschiedene Verfahren erzeugen

# single
cluster_singleL = linkage(invoices_stand, method="single", metric="euclidean")
plt.figure()
dendrogram(cluster_singleL)
plt.title("Dendrogramm – single linkage")
plt.xlabel("Objekte")
plt.ylabel("Distanz")
plt.show()

# complete
cluster_completeL = linkage(invoices_stand, method="complete", metric="euclidean")
plt.figure()
dendrogram(cluster_completeL)
plt.title("Dendrogramm – complete linkage")
plt.xlabel("Objekte")
plt.ylabel("Distanz")
plt.show()

# average
cluster_averageL = linkage(invoices_stand, method="average", metric="euclidean")
plt.figure()
dendrogram(cluster_averageL)
plt.title("Dendrogramm – average linkage")
plt.xlabel("Objekte")
plt.ylabel("Distanz")
plt.show()

# centroid
# (In SciPy existiert 'centroid' als Methode; nutzt euklidische Distanz)
cluster_centroidL = linkage(invoices_stand, method="centroid", metric="euclidean")
plt.figure()
dendrogram(cluster_centroidL)
plt.title("Dendrogramm – centroid linkage")
plt.xlabel("Objekte")
plt.ylabel("Distanz")
plt.show()

# ward
# (R: "ward.D" → SciPy: method="ward" mit euklidischer Distanz)
cluster_ward = linkage(invoices_stand, method="ward", metric="euclidean")
plt.figure()
dendrogram(cluster_ward)
plt.title("Dendrogramm – Ward (ward)")
plt.xlabel("Objekte")
plt.ylabel("Distanz")
plt.show()
# --------------------------------------------------------------------------------

