#!/usr/bin/env python3
# qet_conductors_resolve_links_fix10_sorted_by_page_no_sanitize.py
# Variante ohne sanitize: Debug- und Duplicates-Output optional per GUI-Checkbox.

import xml.etree.ElementTree as ET
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox
import csv, json, re
from collections import defaultdict, deque

MAX_LINK_DEPTH = 2
LABEL_BMK_RE = re.compile(r'^-?\d')
NUMERIC_LABEL_RE = re.compile(r'^\d+(\.\d+)?$')

def clean_tag(tag):
    return tag.split('}')[-1].strip() if tag else ''

def normalize_uuid(u):
    if not u:
        return ''
    return u.strip().strip('{}').lower()

def build_parent_map(root):
    parent = {}
    for p in root.iter():
        for c in p:
            parent[c] = p
    return parent

def climb_to_diagram(elem, parent_map):
    cur = elem
    while cur is not None:
        if clean_tag(cur.tag).lower() == 'diagram':
            return cur
        cur = parent_map.get(cur, None)
    return None

def find_diagram_folio(diagram_elem):
    if diagram_elem is None:
        return ''
    for key in ('folio','page','name','title'):
        if diagram_elem.attrib.get(key):
            return diagram_elem.attrib.get(key)
    for child in diagram_elem:
        if clean_tag(child.tag).lower() in ('folio','page'):
            if child.text and child.text.strip():
                return child.text.strip()
            if child.attrib.get('value'):
                return child.attrib.get('value')
    return ''

def get_exact_child_or_attr(elem, exact_names):
    lower = [n.lower() for n in exact_names]
    for child in elem:
        t = clean_tag(child.tag).lower()
        if t in lower:
            txt = (child.text or '').strip()
            if txt:
                return txt
            for a in ('value','name','label','ref','id'):
                if child.attrib.get(a):
                    return child.attrib.get(a)
            return ''
    for k,v in elem.attrib.items():
        if k.lower() in lower:
            return v
    return ''

# ---------------- Elements ----------------
def collect_elements_info(root, parent_map):
    elements_info = {}
    for el in root.findall('.//element'):
        uuid_raw = el.attrib.get('uuid') or el.attrib.get('id') or ''
        uuid = normalize_uuid(uuid_raw)
        if not uuid:
            continue
        diag = climb_to_diagram(el, parent_map)
        folio = find_diagram_folio(diag)
        links = []
        lparent = el.find('links_uuids')
        if lparent is not None:
            for lu in lparent.findall('link_uuid'):
                u = lu.attrib.get('uuid') or lu.attrib.get('id') or ''
                if u:
                    links.append(normalize_uuid(u))
        label = ''
        name = ''
        ei = el.find('elementInformations')
        if ei is not None:
            for eic in ei.findall('elementInformation'):
                key = eic.attrib.get('name','').lower()
                txt = (eic.text or '').strip()
                if key == 'label' and txt:
                    label = label or txt
                if key == 'name' and txt:
                    name = name or txt
        tval = (el.attrib.get('type') or '').lower()
        is_placeholder = False
        if 'naechste' in tval or 'vorherige' in tval or 'next' in tval or 'previous' in tval:
            is_placeholder = True
        if 'klemmen' in tval and ('naechste' in tval or 'vorherige' in tval):
            is_placeholder = True
        dt = el.find('dynamic_texts')
        if dt is not None and not label and not is_placeholder:
            for det in dt.findall('.//dynamic_elmt_text'):
                info_node = det.find('info_name')
                txt_node = det.find('text')
                info_name = (info_node.text or '').lower() if info_node is not None and info_node.text else ''
                txt = (txt_node.text or '').strip() if txt_node is not None and txt_node.text else ''
                if info_name == 'label' and txt:
                    label = label or txt
                if info_name == 'name' and txt:
                    name = name or txt
        elements_info[uuid] = {'folio': folio or '', 'links': links, 'label': label, 'name': name, 'is_placeholder': is_placeholder}
    return elements_info

def build_canonical_map(elements_info):
    canonical = {}
    visited = set()
    for u in list(elements_info.keys()):
        if u in visited:
            continue
        stack = [u]
        group = set()
        while stack:
            x = stack.pop()
            if x in group:
                continue
            group.add(x)
            visited.add(x)
            for ln in elements_info.get(x, {}).get('links', []):
                if ln and ln not in group:
                    stack.append(ln)
        if group:
            rep = sorted(group)[0]
            for member in group:
                canonical[member] = rep
    return canonical

# ---------------- Conductors ----------------
def collect_conductors(root, parent_map):
    diagram_map = { id(d): find_diagram_folio(d) for d in root.findall('.//diagram') }
    conductors = []
    i = 0
    for conductors_parent in root.findall('.//conductors'):
        for cond in conductors_parent:
            i += 1
            attrib = dict(cond.attrib)
            e1_label = get_exact_child_or_attr(cond, ['element1_label']) or attrib.get('element1_label','') or ''
            e1_linked = get_exact_child_or_attr(cond, ['element1_linked','element1_link']) or attrib.get('element1_linked','') or attrib.get('element1_link','') or ''
            e1_name = get_exact_child_or_attr(cond, ['element1_name']) or attrib.get('element1_name','') or ''
            e2_label = get_exact_child_or_attr(cond, ['element2_label']) or attrib.get('element2_label','') or ''
            e2_linked = get_exact_child_or_attr(cond, ['element2_linked','element2_link']) or attrib.get('element2_linked','') or attrib.get('element2_link','') or ''
            e2_name = get_exact_child_or_attr(cond, ['element2_name']) or attrib.get('element2_name','') or ''
            e1_final = e1_label or e1_linked or e1_name or ''
            e2_final = e2_label or e2_linked or e2_name or ''
            conductor_color = attrib.get('conductor_color','') or ''
            tension = get_exact_child_or_attr(cond, ['tension_protocol','tension','protocol']) or attrib.get('tension_protocol','') or ''
            section = get_exact_child_or_attr(cond, ['conductor_section']) or attrib.get('conductor_section','') or ''
            function = get_exact_child_or_attr(cond, ['function','funktion']) or attrib.get('function','') or ''
            term1 = get_exact_child_or_attr(cond, ['terminalname1']) or attrib.get('terminalname1','') or ''
            term2 = get_exact_child_or_attr(cond, ['terminalname2']) or attrib.get('terminalname2','') or ''
            displaytext = attrib.get('displaytext','')
            diagram_elem = climb_to_diagram(conductors_parent, parent_map)
            folio = diagram_map.get(id(diagram_elem)) if diagram_elem is not None else ''
            if not folio:
                folio = attrib.get('folio','') or attrib.get('page','') or ''
            el1 = normalize_uuid(attrib.get('element1') or attrib.get('element1id') or '')
            el2 = normalize_uuid(attrib.get('element2') or attrib.get('element2id') or '')
            # store raw attrib for later debug/score usage
            conductors.append({
                'index': i,
                'el1_uuid': el1,
                'el2_uuid': el2,
                'element1_label': e1_label,
                'element1_linked': e1_linked,
                'element1_name': e1_name,
                'element1_final': e1_final,
                'element2_label': e2_label,
                'element2_linked': e2_linked,
                'element2_name': e2_name,
                'element2_final': e2_final,
                'terminalname1': term1,
                'terminalname2': term2,
                'conductor_color': conductor_color,
                'tension_protocol': tension,
                'conductor_section': section,
                'function': function,
                'displaytext': displaytext,
                'folio': folio or '',
                'raw_attrib': attrib
            })
    return conductors

def build_el_to_conductors(conductors):
    m = defaultdict(list)
    for c in conductors:
        if c['el1_uuid']:
            m[c['el1_uuid']].append(c)
        if c['el2_uuid']:
            m[c['el2_uuid']].append(c)
    return m

# ---------------- Scoring & Resolve ----------------
def score_candidate(cond, other_uuid, elements_info, source_folio, candidate_label_hint=None):
    s = 0
    if other_uuid in elements_info:
        info = elements_info[other_uuid]
        if info.get('label') and not info.get('is_placeholder') and not NUMERIC_LABEL_RE.match(info.get('label','').strip()):
            s += 1000
            if LABEL_BMK_RE.search(info.get('label','')):
                s += 300
        if info.get('name'):
            s += 200
        if info.get('folio') and source_folio and info.get('folio') != source_folio:
            s += 80
    else:
        if candidate_label_hint:
            s += 150
            if LABEL_BMK_RE.search(candidate_label_hint):
                s += 200
    if cond.get('displaytext') == '1':
        s += 80
    if cond.get('conductor_section'):
        s += 10
    if cond.get('tension_protocol'):
        s += 5
    if cond.get('terminalname1') or cond.get('terminalname2'):
        s += 6
    # treat "num" == "Brücke" specially: low score (we will filter later)
    if cond.get('raw_attrib', {}).get('num', '') == 'Brücke':
        s = -9999
    return s

def resolve_endpoint(cond_side_uuid, elements_info, el_to_cons, cond, cond_side_tuple, source_folio, skip_conductor_index=None, max_link_depth=MAX_LINK_DEPTH):
    """
    Liefert (chosen_uuid, chosen_label, candidates_debug).
    Decision details in candidates_debug for later inspection (if requested).
    """
    candidates_debug = []
    if not cond_side_uuid or cond_side_uuid not in elements_info:
        return None, None, candidates_debug

    # quick direct case: no explicit links and not placeholder -> element itself
    if not elements_info[cond_side_uuid]['links'] and not elements_info[cond_side_uuid].get('is_placeholder'):
        info = elements_info[cond_side_uuid]
        chosen_label = info.get('label') or info.get('name')
        if chosen_label and NUMERIC_LABEL_RE.match(chosen_label.strip()):
            chosen_label = None
        candidates_debug.append({'other_uuid': cond_side_uuid, 'best_score': None, 'causes': [], '_decision': {'reason': 'direct_element', 'chosen_label': chosen_label}})
        return cond_side_uuid, chosen_label, candidates_debug

    visited = {cond_side_uuid}
    q = deque([(cond_side_uuid, 0)])
    found_candidates = {}
    found_causes = defaultdict(list)
    visited_vs = set()

    while q:
        node, dist = q.popleft()
        if max_link_depth is not None and dist > max_link_depth:
            continue
        visited_vs.add(node)

        for c in el_to_cons.get(node, []):
            if skip_conductor_index and c.get('index') == skip_conductor_index:
                continue
            other = c['el1_uuid'] if c['el2_uuid'] == node else c['el2_uuid']

            if c['el1_uuid'] == node:
                label_hint = c.get('element2_linked') or c.get('element2_label') or c.get('element2_name') or None
            else:
                label_hint = c.get('element1_linked') or c.get('element1_label') or c.get('element1_name') or None

            if not other or other == node:
                if label_hint:
                    sc = score_candidate(c, other, elements_info, source_folio, candidate_label_hint=label_hint)
                    found_candidates[other] = max(found_candidates.get(other, -1), sc)
                    found_causes[other].append((c, sc, label_hint))
                continue

            other_ok = False
            if other in elements_info:
                info = elements_info[other]
                if ((info.get('label') and not info.get('is_placeholder') and not NUMERIC_LABEL_RE.match(info.get('label','').strip()))
                    or info.get('name') or (not info.get('links'))):
                    other_ok = True
            if not other_ok and label_hint:
                other_ok = True

            if other_ok:
                sc = score_candidate(c, other, elements_info, source_folio, candidate_label_hint=label_hint)
                prev = found_candidates.get(other, -1)
                if sc > prev:
                    found_candidates[other] = sc
                found_causes[other].append((c, sc, label_hint))

        if found_candidates:
            break

        for linked in elements_info.get(node, {}).get('links', []):
            if linked and linked not in visited:
                visited.add(linked)
                q.append((linked, dist+1))

        for c in el_to_cons.get(node, []):
            if skip_conductor_index and c.get('index') == skip_conductor_index:
                continue
            other = c['el1_uuid'] if c['el2_uuid'] == node else c['el2_uuid']
            if other and other not in visited:
                visited.add(other)
                q.append((other, dist+1))

    for other, causes in found_causes.items():
        best = max(c[1] for c in causes)
        causes_info = []
        for c, sc, hint in causes:
            linked_hint = None
            if c.get('element1_linked'):
                linked_hint = c.get('element1_linked')
            if c.get('element2_linked'):
                linked_hint = c.get('element2_linked') or linked_hint
            causes_info.append({'conductor_raw': c.get('raw_attrib'), 'score': sc, 'label_hint': hint, 'linked_hint': linked_hint})
        candidates_debug.append({'other_uuid': other, 'best_score': best, 'causes': causes_info})

    if found_candidates:
        chosen = sorted(found_candidates.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
        chosen_score = found_candidates.get(chosen, 0)
        chosen_label = None
        if chosen and chosen in elements_info:
            info = elements_info[chosen]
            if info.get('label') and not info.get('is_placeholder') and not NUMERIC_LABEL_RE.match(info.get('label','').strip()):
                chosen_label = info.get('label')
            elif info.get('name'):
                chosen_label = info.get('name')

        linked_candidates = []
        for c, sc, hint in found_causes.get(chosen, []):
            raw = c.get('raw_attrib', {})
            for lk_key in ('element1_linked','element2_linked'):
                lk = raw.get(lk_key)
                if lk and isinstance(lk, str) and lk.strip():
                    linked_candidates.append((lk.strip(), sc))
            if hint and isinstance(hint, str) and hint.strip():
                linked_candidates.append((hint.strip(), sc-1))

        chosen_label_after_link_pref = chosen_label

        best_link_label = None
        best_link_score = None
        if linked_candidates:
            bmks = [(lk, sc) for (lk, sc) in linked_candidates if LABEL_BMK_RE.search(lk)]
            if bmks:
                best_link_label, best_link_score = sorted(bmks, key=lambda x: -x[1])[0]
            else:
                best_link_label, best_link_score = sorted(linked_candidates, key=lambda x: -x[1])[0]

            chosen_is_generic = False
            if chosen_label:
                low = chosen_label.lower()
                if any(token in low for token in ('wechsler','digitaler','klemme','blindanschluss','nächste','vorherige','folie','blind')):
                    chosen_is_generic = True

            if (not chosen_label) or (best_link_label and LABEL_BMK_RE.search(best_link_label) and best_link_score >= chosen_score - 200) or chosen_is_generic:
                chosen_label_after_link_pref = best_link_label

        if not chosen_label_after_link_pref:
            for c, sc, hint in found_causes.get(chosen, []):
                if hint and not NUMERIC_LABEL_RE.match(str(hint).strip()):
                    chosen_label_after_link_pref = hint
                    break

        decision_summary = {
            'chosen_uuid': chosen,
            'chosen_score': chosen_score,
            'chosen_label_initial': chosen_label,
            'chosen_label_final': chosen_label_after_link_pref,
            'best_link_label': best_link_label,
            'best_link_score': best_link_score
        }
        candidates_debug.append({'_decision': decision_summary})

        return chosen, chosen_label_after_link_pref, candidates_debug

    labelled_vs = [v for v in visited_vs if v in elements_info and (elements_info[v].get('label') or elements_info[v].get('name'))]
    for v in sorted(set(labelled_vs)):
        info = elements_info[v]
        if info.get('label') and not info.get('is_placeholder') and not NUMERIC_LABEL_RE.match(info.get('label','').strip()):
            candidates_debug.append({'_decision': {'chosen_uuid': v, 'reason': 'fallback_label'}})
            return v, info.get('label'), candidates_debug
        if info.get('name'):
            candidates_debug.append({'_decision': {'chosen_uuid': v, 'reason': 'fallback_name'}})
            return v, info.get('name'), candidates_debug

    candidates_debug.append({'_decision': {'chosen_uuid': None, 'reason': 'none_found'}})
    return None, None, candidates_debug

def extract_terminal_from_candidates(candidates, chosen_uuid):
    if not candidates or not chosen_uuid:
        return ''
    for entry in candidates:
        if entry.get('other_uuid') == chosen_uuid:
            causes = entry.get('causes', [])
            if not causes:
                continue
            best = sorted(causes, key=lambda c: -c.get('score', 0))[0]
            raw = best.get('conductor_raw', {}) or {}
            el1 = normalize_uuid(raw.get('element1') or '')
            el2 = normalize_uuid(raw.get('element2') or '')
            t1 = raw.get('terminalname1') or raw.get('terminal1') or ''
            t2 = raw.get('terminalname2') or raw.get('terminal2') or ''
            if el1 and el1 == normalize_uuid(chosen_uuid):
                return t1 or ''
            if el2 and el2 == normalize_uuid(chosen_uuid):
                return t2 or ''
            if t2:
                return t2
            if t1:
                return t1
    return ''

def label_from_uuid_or_conductor_fallback(chosen_uuid, chosen_label, cond_tuple):
    if chosen_label:
        return chosen_label
    lab, linked, name = cond_tuple
    if lab:
        return lab
    if linked:
        return linked
    if name:
        return name
    return ''

# ---------------- CSV writers ----------------
def write_output_csv(rows, out_path: Path):
    header = [
        'Seite','Bauteil 1','Terminalname 1','Bauteil 2','Terminalname 2',
        'Spannung / Protokoll','Aderfarbe','Aderquerschnitt','Funktion'
    ]
    with out_path.open('w', newline='', encoding='utf-8') as f:
        w = csv.DictWriter(f, fieldnames=header)
        w.writeheader()
        for r in rows:
            w.writerow({
                'Seite': r.get('folio',''),
                'Bauteil 1': r.get('element1_label',''),
                'Terminalname 1': r.get('terminalname1',''),
                'Bauteil 2': r.get('element2_label',''),
                'Terminalname 2': r.get('terminalname2',''),
                'Spannung / Protokoll': r.get('tension_protocol',''),
                'Aderfarbe': r.get('conductor_color',''),
                'Aderquerschnitt': r.get('conductor_section',''),
                'Funktion': r.get('function',''),
            })

def write_debug_csv(debug_rows, out_path: Path):
    header = ['conductor_index','side','source_uuid','chosen_uuid','chosen_label','candidates_json']
    with out_path.open('w', newline='', encoding='utf-8') as f:
        w = csv.DictWriter(f, fieldnames=header)
        w.writeheader()
        for d in debug_rows:
            w.writerow({
                'conductor_index': d.get('index'),
                'side': d.get('side'),
                'source_uuid': d.get('source_uuid',''),
                'chosen_uuid': d.get('chosen_uuid',''),
                'chosen_label': d.get('chosen_label',''),
                'candidates_json': json.dumps(d.get('candidates',[]), ensure_ascii=False)
            })

def write_duplicates_csv(dups, out_path: Path):
    header = ['original_row_index','duplicate_row_index','Seite','Bauteil1','Terminal1','Bauteil2','Terminal2','tension','color','section','function']
    with out_path.open('w', newline='', encoding='utf-8') as f:
        w = csv.DictWriter(f, fieldnames=header)
        w.writeheader()
        for d in dups:
            r = d['duplicate_row']
            w.writerow({
                'original_row_index': d['original_row_index'],
                'duplicate_row_index': d['duplicate_row_index'],
                'Seite': r.get('folio',''),
                'Bauteil1': r.get('element1_label',''),
                'Terminal1': r.get('terminalname1',''),
                'Bauteil2': r.get('element2_label',''),
                'Terminal2': r.get('terminalname2',''),
                'tension': r.get('tension_protocol',''),
                'color': r.get('conductor_color',''),
                'section': r.get('conductor_section',''),
                'function': r.get('function','')
            })

# ---------------- Signatures ----------------
def make_sorted_endpoint(a_id, a_term, b_id, b_term):
    e1 = (a_id or '').strip(), (a_term or '').strip()
    e2 = (b_id or '').strip(), (b_term or '').strip()
    return tuple(sorted([e1, e2]))

def build_signatures_for_row(r, candidates_a, candidates_b):
    sigs = set()
    def endpoint_id(label, canon, rawuuid):
        if label:
            return label.lower()
        if canon:
            return canon
        return normalize_uuid(rawwuid or '')

    id1 = endpoint_id(r.get('element1_label',''), r.get('_el1_canon',''), r.get('_el1_uuid',''))
    id2 = endpoint_id(r.get('element2_label',''), r.get('_el2_canon',''), r.get('_el2_uuid',''))
    t1 = (r.get('terminalname1') or '').strip()
    t2 = (r.get('terminalname2') or '').strip()
    sigs.add(make_sorted_endpoint(id1, t1, id2, t2))
    sigs.add(make_sorted_endpoint(id1, '', id2, ''))
    sigs.add(make_sorted_endpoint(id1, t1, id2, ''))
    sigs.add(make_sorted_endpoint(id1, '', id2, t2))
    raw1 = normalize_uuid(r.get('_el1_uuid','') or '')
    raw2 = normalize_uuid(r.get('_el2_uuid','') or '')
    if raw1 or raw2:
        sigs.add(make_sorted_endpoint(raw1, t1, raw2, t2))
        sigs.add(make_sorted_endpoint(raw1, '', raw2, ''))

    def add_from_candidates(cands):
        for entry in cands:
            for cause in entry.get('causes', []):
                raw = cause.get('conductor_raw', {}) or {}
                el1 = normalize_uuid(raw.get('element1') or '')
                el2 = normalize_uuid(raw.get('element2') or '')
                tn1 = raw.get('terminalname1') or raw.get('terminal1') or ''
                tn2 = raw.get('terminalname2') or raw.get('terminal2') or ''
                if el1 or el2:
                    sigs.add(make_sorted_endpoint(el1, tn1, el2, tn2))
                    sigs.add(make_sorted_endpoint(el1, '', el2, ''))
    add_from_candidates(candidates_a)
    add_from_candidates(candidates_b)
    return sigs

# ---------------- Helpers: folio key & endpoint sort id ----------------
def folio_sort_key(folio):
    if folio is None:
        return (1, "")
    f = str(folio)
    m = re.search(r'\d+', f)
    if m:
        try:
            return (0, int(m.group(0)))
        except:
            pass
    return (1, f.lower())

def endpoint_identifier_for_sort(label, canon, rawuuid):
    if label:
        return label.lower()
    if canon:
        return canon
    return normalize_uuid(rawwuid or '')

# ---------------- Main processing ----------------
def process_file(fp: str, write_debug=False):
    qet_path = Path(fp)
    tree = ET.parse(qet_path); root = tree.getroot()
    parent_map = build_parent_map(root)
    elements_info = collect_elements_info(root, parent_map)
    canonical_map = build_canonical_map(elements_info)
    conductors = collect_conductors(root, parent_map)
    el_to_cons = build_el_to_conductors(conductors)

    rows = []
    debug_rows = []
    per_cond_candidates = {}

    for cond in conductors:
        # skip explicit "Brücke" conductors (do not include in CSV)
        if cond.get('raw_attrib', {}).get('num', '') == 'Brücke':
            # don't process further (skip)
            continue

        # resolve side A
        if cond.get('element1_label'):
            final_a_label = cond.get('element1_label')
            chosen_a_uuid = None
            chosen_a_label = final_a_label
            candidates_a = []
        else:
            chosen_a_uuid, chosen_a_label, candidates_a = resolve_endpoint(
                cond['el1_uuid'], elements_info, el_to_cons, cond,
                (cond.get('element1_label'), cond.get('element1_linked'), cond.get('element1_name')),
                cond.get('folio',''),
                skip_conductor_index=cond['index'], max_link_depth=MAX_LINK_DEPTH)
            final_a_label = label_from_uuid_or_conductor_fallback(chosen_a_uuid, chosen_a_label,
                                                                 (cond.get('element1_label'), cond.get('element1_linked'), cond.get('element1_name')))

        # resolve side B
        if cond.get('element2_label'):
            final_b_label = cond.get('element2_label')
            chosen_b_uuid = None
            chosen_b_label = final_b_label
            candidates_b = []
        else:
            chosen_b_uuid, chosen_b_label, candidates_b = resolve_endpoint(
                cond['el2_uuid'], elements_info, el_to_cons, cond,
                (cond.get('element2_label'), cond.get('element2_linked'), cond.get('element2_name')),
                cond.get('folio',''),
                skip_conductor_index=cond['index'], max_link_depth=MAX_LINK_DEPTH)
            final_b_label = label_from_uuid_or_conductor_fallback(chosen_b_uuid, chosen_b_label,
                                                                 (cond.get('element2_label'), cond.get('element2_linked'), cond.get('element2_name')))

        term1 = cond.get('terminalname1','') or ''
        term2 = cond.get('terminalname2','') or ''
        if not term1 and chosen_a_uuid and candidates_a:
            term1 = extract_terminal_from_candidates(candidates_a, chosen_a_uuid) or term1
        if not term2 and chosen_b_uuid and candidates_b:
            term2 = extract_terminal_from_candidates(candidates_b, chosen_b_uuid) or term2

        el1_canon = canonical_map.get(cond.get('el1_uuid','')) or normalize_uuid(cond.get('el1_uuid',''))
        el2_canon = canonical_map.get(cond.get('el2_uuid','')) or normalize_uuid(cond.get('el2_uuid',''))

        sideA_folio = ''
        sideB_folio = ''
        if chosen_a_uuid and chosen_a_uuid in elements_info:
            sideA_folio = elements_info[chosen_a_uuid].get('folio','')
        if chosen_b_uuid and chosen_b_uuid in elements_info:
            sideB_folio = elements_info[chosen_b_uuid].get('folio','')
        if not sideA_folio:
            sideA_folio = cond.get('folio','')
        if not sideB_folio:
            sideB_folio = cond.get('folio','')

        row = {
            'folio': cond.get('folio',''),
            'element1_label': final_a_label or cond.get('element1_final','') or '',
            'terminalname1': term1,
            'element2_label': final_b_label or cond.get('element2_final','') or '',
            'terminalname2': term2,
            'tension_protocol': cond.get('tension_protocol',''),
            'conductor_color': cond.get('conductor_color',''),
            'conductor_section': cond.get('conductor_section',''),
            'function': cond.get('function',''),
            '_el1_uuid': cond.get('el1_uuid',''),
            '_el2_uuid': cond.get('el2_uuid',''),
            '_el1_canon': el1_canon,
            '_el2_canon': el2_canon,
            '_cond_index': cond['index'],
            '_chosen_a_uuid': chosen_a_uuid or '',
            '_chosen_b_uuid': chosen_b_uuid or '',
            '_sideA_folio': sideA_folio or '',
            '_sideB_folio': sideB_folio or ''
        }

        rows.append(row)
        per_cond_candidates[cond['index']] = (candidates_a, candidates_b)

        if write_debug:
            debug_rows.append({
                'index': cond['index'], 'side': 'A', 'source_uuid': cond.get('el1_uuid',''),
                'chosen_uuid': chosen_a_uuid or '', 'chosen_label': chosen_a_label or '',
                'candidates': candidates_a
            })
            debug_rows.append({
                'index': cond['index'], 'side': 'B', 'source_uuid': cond.get('el2_uuid',''),
                'chosen_uuid': chosen_b_uuid or '', 'chosen_label': chosen_b_label or '',
                'candidates': candidates_b
            })

    # Dedupe
    seen_signatures = {}
    filtered_rows = []
    duplicates = []
    for idx, r in enumerate(rows):
        cond_index = r.get('_cond_index')
        cands_a, cands_b = per_cond_candidates.get(cond_index, ([],[]))
        sigs = build_signatures_for_row(r, cands_a, cands_b)
        matched_original = None
        for s in sigs:
            if s in seen_signatures:
                matched_original = seen_signatures[s]
                break
        if matched_original is not None:
            duplicates.append({'original_row_index': matched_original, 'duplicate_row_index': idx, 'duplicate_row': r.copy()})
            continue
        for s in sigs:
            seen_signatures[s] = idx
        filtered_rows.append(r.copy())

    # Canonicalize order (so smaller endpoint is element1) and set folio to endpoint's folio
    canon_filtered = []
    for r in filtered_rows:
        id1 = endpoint_identifier_for_sort(r.get('element1_label',''), r.get('_el1_canon',''), r.get('_el1_uuid',''))
        id2 = endpoint_identifier_for_sort(r.get('element2_label',''), r.get('_el2_canon',''), r.get('_el2_uuid',''))
        if id2 and (not id1 or id2 < id1):
            # swap labels and terminals
            r['element1_label'], r['element2_label'] = r.get('element2_label',''), r.get('element1_label','')
            r['terminalname1'], r['terminalname2'] = r.get('terminalname2',''), r.get('terminalname1','')
            # swap helper uuids and canons
            r['_el1_uuid'], r['_el2_uuid'] = r.get('_el2_uuid',''), r.get('_el1_uuid','')
            r['_el1_canon'], r['_el2_canon'] = r.get('_el2_canon',''), r.get('_el1_canon','')
            r['_chosen_a_uuid'], r['_chosen_b_uuid'] = r.get('_chosen_b_uuid',''), r.get('_chosen_a_uuid','')
            # set folio to the side folio of new element1 if available
            new_sideA_folio = r.get('_sideB_folio','') or r.get('folio','')
            r['folio'] = new_sideA_folio or r.get('folio','')
        else:
            r['folio'] = r.get('_sideA_folio','') or r.get('folio','')
        canon_filtered.append(r)

    # Canonicalize duplicates rows too (strip helper fields later)
    canon_duplicates = []
    for d in duplicates:
        r = d['duplicate_row']
        id1 = endpoint_identifier_for_sort(r.get('element1_label',''), r.get('_el1_canon',''), r.get('_el1_uuid',''))
        id2 = endpoint_identifier_for_sort(r.get('element2_label',''), r.get('_el2_canon',''), r.get('_el2_uuid',''))
        if id2 and (not id1 or id2 < id1):
            r['element1_label'], r['element2_label'] = r.get('element2_label',''), r.get('element1_label','')
            r['terminalname1'], r['terminalname2'] = r.get('terminalname2',''), r.get('terminalname1','')
            r['_el1_uuid'], r['_el2_uuid'] = r.get('_el2_uuid',''), r.get('_el1_uuid','')
            r['_el1_canon'], r['_el2_canon'] = r.get('_el2_canon',''), r.get('_el1_canon','')
            r['_chosen_a_uuid'], r['_chosen_b_uuid'] = r.get('_chosen_b_uuid',''), r.get('_chosen_a_uuid','')
            new_sideA_folio = r.get('_sideB_folio','') or r.get('folio','')
            r['folio'] = new_sideA_folio or r.get('folio','')
        else:
            r['folio'] = r.get('_sideA_folio','') or r.get('folio','')
        out_r = {k:v for k,v in r.items() if not k.startswith('_')}
        canon_duplicates.append({'original_row_index': d['original_row_index'], 'duplicate_row_index': d['duplicate_row_index'], 'duplicate_row': out_r})

    # FINAL SORT: primär nach Seite (numerisch), danach Bauteil1, Bauteil2, Terminal1, Terminal2
    final_rows = []
    for r in canon_filtered:
        out_row = {k:v for k,v in r.items() if not k.startswith('_')}
        final_rows.append(out_row)

    def sort_key(rr):
        return (
            folio_sort_key(rr.get('folio','')),
            (rr.get('element1_label') or '').lower(),
            (rr.get('element2_label') or '').lower(),
            (rr.get('terminalname1') or ''),
            (rr.get('terminalname2') or '')
        )

    final_rows_sorted = sorted(final_rows, key=sort_key)

    # write outputs
    out = qet_path.with_suffix('.conductors.merged.csv')
    write_output_csv(final_rows_sorted, out)

    dbg = None
    dupf = None
    if write_debug:
        dbg = qet_path.with_suffix('.conductors.merged.debug.csv')
        write_debug_csv(debug_rows, dbg)
        dupf = qet_path.with_suffix('.conductors.merged.duplicates.csv')
        write_duplicates_csv(canon_duplicates, dupf)

    return out, dbg, dupf

# ---------------- GUI ----------------
def choose_and_process_gui():
    fp = filedialog.askopenfilename(title="Wähle .qet Datei", filetypes=[("QET","*.qet"),("XML","*.xml"),("All files","*.*")])
    if not fp:
        return
    write_debug_flag = bool(write_debug_var.get())
    try:
        out, dbg, dupf = process_file(fp, write_debug=write_debug_flag)
        msg = f"CSV (kanonisiert & dedupliziert, sortiert nach Seite) erstellt:\n{out}"
        if write_debug_flag and dbg:
            msg += f"\n\nDebug-CSV: {dbg}\nDuplicates-CSV: {dupf}"
        messagebox.showinfo("Erfolg", msg)
    except Exception as e:
        messagebox.showerror("Fehler", f"Verarbeitung fehlgeschlagen:\n{e}")

def run_gui():
    global write_debug_var
    root = tk.Tk()
    root.title("QET Conductors -> merged CSV (no sanitize, optional debug)")
    root.geometry("720x160")
    lbl = tk.Label(root, text="Wähle .qet. Datei um den Verdrahtungsplan als CSV zu erstellen", justify='left', wraplength=880)
    lbl.pack(padx=10, pady=10)
    chk_frame = tk.Frame(root)
    write_debug_var = tk.IntVar(value=0)  # default OFF
    chk = tk.Checkbutton(chk_frame, text="Erstelle debug & duplicates csv(optional)", variable=write_debug_var)
    chk.pack(side='left', padx=6)
    chk_frame.pack(pady=4)
    btn = tk.Button(root, text="Datei wählen und CSV erzeugen", command=choose_and_process_gui, width=56, height=2)
    btn.pack(pady=6)
    root.mainloop()

if __name__ == '__main__':
    run_gui()