# -*- coding: utf-8 -*-
"""
Created on Sat Oct  4 09:20:39 2025

@author: Moritz Romeike
"""
# --------------------------------------------------------------------------------
# Programmcode 30 (Python): Complete Linkage & Dendrogramm
# ohne SciPy – reine numpy/pandas/matplotlib
# --------------------------------------------------------------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# --- Daten laden (read.csv2-ähnlich: sep=';') -----------------------------------
csv_path = "daten_kmeans_clustering.csv"
df = pd.read_csv(csv_path, sep=';', dtype=str, encoding='utf-8', engine='python')

# Trim und numerisch konvertieren (Dezimalkomma -> Punkt)
df.columns = df.columns.str.strip()
for c in df.columns:
    df[c] = df[c].astype(str).str.strip()

num_cols = ["LieferantenID", "Zeit", "Rechnungsbetrag"]
for c in num_cols:
    df[c] = pd.to_numeric(
        df[c].str.replace(".", "", regex=False).str.replace(",", ".", regex=False),
        errors="coerce"
    )

# --- Beispiel 6 Rechnungen ------------------------------------------------------
data_example = df.loc[:5, num_cols].to_numpy(dtype=float)  # 6 Zeilen
labels = [f"Obj{i+1}" for i in range(data_example.shape[0])]  # einfache Labels wie Reihenfolge

# --- z-Standardisierung ---------------------------------------------------------
X = data_example
mu = X.mean(axis=0, keepdims=True)
sd = X.std(axis=0, ddof=1, keepdims=True)
sd[sd == 0] = 1.0
Z = (X - mu) / sd

# --- euklidische Distanzmatrix --------------------------------------------------
def euclidean_distance_matrix(A: np.ndarray) -> np.ndarray:
    G = A @ A.T
    sq = np.diag(G)
    D2 = sq[:, None] + sq[None, :] - 2*G
    D2[D2 < 0] = 0.0
    return np.sqrt(D2)

D = euclidean_distance_matrix(Z)

# --- Hierarchisches Clustering: Complete Linkage (ohne SciPy) -------------------
# Gibt Linkage-Matrix wie SciPy: [idxA, idxB, height, sizeMerged]
# Leaf-Indizes: 0..n-1; neue Cluster bekommen IDs n, n+1, ...
def hclust_complete(D: np.ndarray):
    n = D.shape[0]
    # Start: jeder Punkt eigener Cluster
    clusters = {i: [i] for i in range(n)}
    active = list(clusters.keys())
    next_id = n
    linkage = []

    # Hilfsfunktion: Distanz zweier Cluster = max Paar-Distanz
    def dist_complete(ca, cb):
        return np.max(D[np.ix_(ca, cb)])

    while len(active) > 1:
        best = None
        # finde Paar mit minimaler Complete-Linkage-Distanz
        for i in range(len(active)):
            for j in range(i+1, len(active)):
                a, b = active[i], active[j]
                da = dist_complete(clusters[a], clusters[b])
                if (best is None) or (da < best[0]):
                    best = (da, a, b)
        height, a, b = best
        # merge
        new_members = clusters[a] + clusters[b]
        linkage.append([a, b, float(height), len(new_members)])
        # update Strukturen
        del clusters[a]; del clusters[b]
        active = [x for x in active if x not in (a, b)]
        clusters[next_id] = new_members
        active.append(next_id)
        next_id += 1

    return np.array(linkage, dtype=float)

linkage = hclust_complete(D)

# --- Fusionshöhen ausgeben (wie get_branches_heights) ---------------------------
merge_heights = linkage[:, 2]
print("Fusionshöhen (Complete Linkage):")
print(np.round(merge_heights, 6).tolist())

# --- Dendrogramm zeichnen (horizontal) ------------------------------------------
# Aus der Linkage einen Binärbaum bauen und plotten.

class Node:
    def __init__(self, idx, left=None, right=None, height=0.0, size=1, leaf=True):
        self.idx = idx
        self.left = left
        self.right = right
        self.height = height
        self.size = size
        self.leaf = leaf
        self.x = None  # y-Koordinate im horizontalen Plot (Reihenfolge)
        self.y = None  # x-Koordinate = Höhe

def build_tree(linkage, n_leaves):
    nodes = {i: Node(i, height=0.0, size=1, leaf=True) for i in range(n_leaves)}
    next_id = n_leaves
    for row in linkage:
        a, b, h, sz = int(row[0]), int(row[1]), float(row[2]), int(row[3])
        left = nodes[a]
        right = nodes[b]
        parent = Node(next_id, left=left, right=right, height=h, size=sz, leaf=False)
        nodes[next_id] = parent
        next_id += 1
    # root ist letzter erstellter
    return nodes[next_id - 1], nodes

def assign_leaf_positions(root: Node, nodes: dict, leaf_order):
    # leaf_order ist Liste der Leaf-IDs in gewünschter Reihenfolge (hier: 0..n-1)
    # Wir setzen x (vertikale Position im Horizontal-Plot) entsprechend der Reihenfolge
    pos = {leaf_id: i for i, leaf_id in enumerate(leaf_order)}
    def dfs(node):
        if node.leaf:
            node.x = pos[node.idx]
            node.y = node.height
        else:
            dfs(node.left)
            dfs(node.right)
            node.x = (node.left.x + node.right.x) / 2.0
            node.y = node.height
    dfs(root)

def plot_dendrogram_horizontal(root: Node, labels):
    fig, ax = plt.subplots(figsize=(8, 4))
    def draw(node):
        if node.leaf:
            # Blatt: nur Label später
            return
        # zeichne vertikale Linien der Kinder bis Eltern-Höhe
        for child in [node.left, node.right]:
            ax.plot([child.y, node.y], [child.x, child.x], '-', color='k')  # horizontal (Höhe)
            ax.plot([node.y, node.y], [node.left.x, node.right.x], '-', color='k')  # vertikal Verbindung
            draw(child)
    draw(root)
    # Labels an Blätter
    def collect_leaves(node, res):
        if node.leaf:
            res.append(node)
        else:
            collect_leaves(node.left, res)
            collect_leaves(node.right, res)
    leaves = []
    collect_leaves(root, leaves)
    for leaf in leaves:
        ax.text(leaf.y, leaf.x, f" {labels[leaf.idx]}", va='center', ha='left')
        ax.plot([leaf.y, leaf.y], [leaf.x, leaf.x], 'o', color='k')  # Punkt am Blatt

    ax.set_xlabel("Höhe / Distanz")
    ax.set_ylabel("Objekte")
    ax.set_title("Dendrogramm (Complete Linkage) – horizontal")
    ax.invert_yaxis()  # damit erstes Objekt oben steht
    plt.tight_layout()
    plt.show()

# Baum bauen und zeichnen
n = Z.shape[0]
root, nodes = build_tree(linkage, n)
assign_leaf_positions(root, nodes, leaf_order=list(range(n)))
plot_dendrogram_horizontal(root, labels)

# Hinweis: Die „Höhe“ entspricht hier den Complete-Linkage-Distanzen der Fusionen.
# --------------------------------------------------------------------------------
