"""Higgsfield Soul 2.0 vs Flux LoRA — ArcFace identity consistency comparison"""
import os, sys, cv2, numpy as np
from pathlib import Path

sys.stdout.reconfigure(encoding='utf-8')

from insightface.app import FaceAnalysis
app = FaceAnalysis(name='buffalo_l', allowed_modules=['detection', 'recognition'])
app.prepare(ctx_id=-1, det_size=(640, 640))

def imread_unicode(path):
    buf = np.fromfile(str(path), dtype=np.uint8)
    return cv2.imdecode(buf, cv2.IMREAD_COLOR)

def get_embedding(img):
    faces = app.get(img)
    if not faces: return None
    face = max(faces, key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]))
    return face.normed_embedding

# Reference: LoRA dataset average
dataset_dir = Path('G:/StabilityMatrix/Packages/AI-Toolkit/datasets/mzyeoja')
embs = []
for f in sorted(list(dataset_dir.glob('*.jpg')) + list(dataset_dir.glob('*.webp'))):
    img = imread_unicode(f)
    if img is None: continue
    emb = get_embedding(img)
    if emb is not None: embs.append(emb)
ref = np.mean(embs, axis=0)
ref = ref / np.linalg.norm(ref)
print(f'REF: {len(embs)} faces from LoRA dataset\n')

# Higgsfield results
print('=== Higgsfield Soul 2.0 Results ===')
hf_dir = Path('G:/작업/조선왕자/배우 로라 데이터셋/pipeline_output_여_v2/Higgsfield Soul 2.0 용 데이터셋/Higgsfield Soul 2.0 결과물')
hf_scores = []
for f in sorted(hf_dir.glob('*.png')):
    img = imread_unicode(f)
    if img is None: continue
    emb = get_embedding(img)
    if emb is not None:
        score = float(np.dot(ref, emb))
        hf_scores.append(score)
        print(f'  {f.name[:40]:40s}  {score:.3f}')
    else:
        print(f'  {f.name[:40]:40s}  NO FACE')

# LoRA 6-prompt test
print('\n=== LoRA ComfyUI Results (6-prompt test) ===')
lora_dir = Path('G:/StabilityMatrix/Packages/ComfyUI/output')
lora_scores = []
for f in sorted(lora_dir.glob('mzyeoja_v2_test_*.png')):
    img = imread_unicode(f)
    if img is None: continue
    emb = get_embedding(img)
    if emb is not None:
        score = float(np.dot(ref, emb))
        lora_scores.append(score)
        print(f'  {f.name:40s}  {score:.3f}')
    else:
        print(f'  {f.name:40s}  NO FACE')

# LoRA street test
print('\n=== LoRA Street Test ===')
for f in sorted(lora_dir.glob('mzyeoja_v2_street_*.png')):
    img = imread_unicode(f)
    if img is None: continue
    emb = get_embedding(img)
    if emb is not None:
        score = float(np.dot(ref, emb))
        lora_scores.append(score)
        print(f'  {f.name:40s}  {score:.3f}')
    else:
        print(f'  {f.name:40s}  NO FACE')

# Also check Higgsfield DATASET vs LoRA DATASET (what was excluded)
print('\n=== Higgsfield Dataset (excluded from LoRA) ===')
hf_dataset_dir = Path('G:/작업/조선왕자/배우 로라 데이터셋/pipeline_output_여_v2/Higgsfield Soul 2.0 용 데이터셋')
lora_files = set(f.stem for f in dataset_dir.glob('*') if f.suffix in ('.jpg','.webp'))
excluded_scores = []
for f in sorted(list(hf_dataset_dir.glob('*.jpg')) + list(hf_dataset_dir.glob('*.webp'))):
    if f.stem in lora_files:
        continue  # skip files that ARE in LoRA dataset
    img = imread_unicode(f)
    if img is None: continue
    emb = get_embedding(img)
    if emb is not None:
        score = float(np.dot(ref, emb))
        excluded_scores.append(score)
        print(f'  EXCLUDED: {f.name:40s}  {score:.3f}')
    else:
        print(f'  EXCLUDED: {f.name:40s}  NO FACE')

# Summary
print('\n' + '='*60)
print('SUMMARY')
print('='*60)
if hf_scores:
    print(f'Higgsfield Results:  AVG={np.mean(hf_scores):.3f}  min={min(hf_scores):.3f}  max={max(hf_scores):.3f}  std={np.std(hf_scores):.3f}  n={len(hf_scores)}')
if lora_scores:
    print(f'LoRA Results:        AVG={np.mean(lora_scores):.3f}  min={min(lora_scores):.3f}  max={max(lora_scores):.3f}  std={np.std(lora_scores):.3f}  n={len(lora_scores)}')
if excluded_scores:
    print(f'Excluded Dataset:    AVG={np.mean(excluded_scores):.3f}  min={min(excluded_scores):.3f}  max={max(excluded_scores):.3f}  std={np.std(excluded_scores):.3f}  n={len(excluded_scores)}')
    print(f'\n>>> Pipeline excluded {len(excluded_scores)} images, mean ArcFace: {np.mean(excluded_scores):.3f}')
    print(f'>>> Including these in training may have strengthened identity learning')
