import customtkinter as ctk
from tkinter import filedialog, messagebox
import threading
import json
import os
import subprocess
import time  # Importé pour le chronométrage
from pathlib import Path
from datetime import timedelta
import stable_whisper

# --- CONFIGURATION ---
PROFILES_FILE = "karaoke_profiles.json"
CONFIG_FILE = "app_settings.json"  # Fichier pour la position et les réglages courants

def format_duration(seconds: float) -> str:
    """Formate une durée en mm:ss"""
    seconds = int(round(seconds))
    minutes = seconds // 60
    secs = seconds % 60
    return f"{minutes:02d}:{secs:02d}"

def render_karaoke_mp4(instrumental_wav: Path, ass_file: Path, output_mp4: Path, bg_video_path: Path = None, 
    resolution="1920x1080", fps=30, audio_bitrate="256k"):
    
    ass_filter_path = str(ass_file).replace("\\", "/").replace(":", "\\:")

    if bg_video_path and bg_video_path.exists():
        input_bg = ["-stream_loop", "-1", "-i", str(bg_video_path)]
    else:
        input_bg = ["-f", "lavfi", "-i", f"color=size={resolution}:rate={fps}:color=black"]
    
    cmd = [
        "ffmpeg", "-y",
    ] + input_bg + [
        "-i", str(instrumental_wav),
        "-vf", f"scale={resolution.replace('x', ':')}:force_original_aspect_ratio=increase,crop={resolution.replace('x', ':')},ass='{ass_filter_path}'",
        "-c:v", "libx264",
        "-preset", "slow",
        "-crf", "18",
        "-pix_fmt", "yuv420p",
        "-c:a", "aac",
        "-b:a", audio_bitrate,
        "-shortest",
        str(output_mp4)
    ]
    
    subprocess.run(cmd, check=True)

def format_ass_time(seconds):
    if seconds < 0: seconds = 0
    td = timedelta(seconds=seconds)
    total_seconds = int(td.total_seconds())
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    secs = total_seconds % 60
    cs = int(td.microseconds / 10000)
    return f"{hours}:{minutes:02d}:{secs:02d}.{cs:02d}"

def decouper_segments(segments, max_mots=8):
    nouveaux_segments = []
    for seg in segments:
        mots = seg.words
        if len(mots) <= max_mots:
            nouveaux_segments.append(seg)
        else:
            for i in range(0, len(mots), max_mots):
                sous_groupe = mots[i:i + max_mots]
                nouveaux_segments.append({
                    'start': sous_groupe[0].start,
                    'end': sous_groupe[-1].end,
                    'words': sous_groupe,
                    'text': "".join([m.word for m in sous_groupe]).strip()
                })
    return nouveaux_segments

def generer_karaoke(chemin_audio, chemin_resultat_ass, max_mots=8, decalage_manuel=0.1, progressif=True, langage="fr", model_name="large"):
    model = stable_whisper.load_model(model_name)
    result = model.transcribe(chemin_audio, language=langage)
    segments_optimises = decouper_segments(result.segments, max_mots=max_mots)
    
    ass_header = """[Script Info]
ScriptType: v4.00+
PlayResX: 1280
PlayResY: 720

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Actif,Arial,48,&H00FFFFFF,&H0000FF00,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,3,0,2,20,20,180,1
Style: Suivant,Arial,40,&H80FFFFFF,&H00FFFFFF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,2,0,2,20,20,80,1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
    with open(chemin_resultat_ass, 'w', encoding='utf-8') as f:
        f.write(ass_header)
        num_segments = len(segments_optimises)
        for i in range(num_segments):
            current = segments_optimises[i]
            
            # Gestion accès dictionnaire ou objet
            c_start = current.start if hasattr(current, 'start') else current['start']
            c_end = current.end if hasattr(current, 'end') else current['end']
            c_words = current.words if hasattr(current, 'words') else current['words']
            c_text = current.text if hasattr(current, 'text') else current['text']

            if i > 0:
                prev = segments_optimises[i-1]
                start_display_raw = prev.end if hasattr(prev, 'end') else prev['end']
            else:
                start_display_raw = c_start
            
            end_display_raw = c_end
            
            if not progressif:
                karaoke_text = c_text
            else:
                first_word_start = c_words[0].start + decalage_manuel
                silence_initial = int(max(0, (first_word_start - start_display_raw) * 100))
                karaoke_text = f"{{\\k{silence_initial}}}" if silence_initial > 0 else ""
                for word in c_words:
                    duration = int(max(1, (word.end - word.start) * 100))
                    karaoke_text += f"{{\\k{duration}}}{word.word}"
            
            start_t = format_ass_time(start_display_raw)
            end_t = format_ass_time(end_display_raw)
            f.write(f"Dialogue: 1,{start_t},{end_t},Actif,,0,0,0,,{karaoke_text}\n")
            
            if i + 1 < num_segments:
                next_seg = segments_optimises[i+1]
                next_text = next_seg.text.strip() if hasattr(next_seg, 'text') else next_seg['text'].strip()
                f.write(f"Dialogue: 0,{start_t},{end_t},Suivant,,0,0,0,,{next_text}\n")

class KaraokeApp(ctk.CTk):
    def __init__(self):
        super().__init__()

        self.title("Karaoké Generator Pro")
        self.geometry("1000x920")
        ctk.set_appearance_mode("dark")

        # --- État interne ---
        self.current_process = None
        self.stop_requested = False

        # --- Variables ---
        self.vocal_path = ctk.StringVar()
        self.instru_path = ctk.StringVar()
        self.bg_path = ctk.StringVar()
        self.model_var = ctk.StringVar(value="large")
        self.lang_var = ctk.StringVar(value="fr")
        self.res_var = ctk.StringVar(value="1920x1080")
        self.fps_var = ctk.IntVar(value=30)
        self.offset_var = ctk.DoubleVar(value=0.1)
        self.prog_var = ctk.BooleanVar(value=True)

        self.setup_ui()
        self.load_profiles_list()
        
        # Charger les réglages précédents
        self.load_app_settings()

        # Intercepter la fermeture de la fenêtre
        self.protocol("WM_DELETE_WINDOW", self.on_closing)

    def setup_ui(self):
        self.grid_columnconfigure(0, weight=1)

        # --- SECTION : PROFILS ---
        prof_frame = ctk.CTkFrame(self)
        prof_frame.pack(fill="x", padx=20, pady=(15, 5))
        ctk.CTkLabel(prof_frame, text="Profil :", font=("", 12, "bold")).pack(side="left", padx=10)
        self.profile_menu = ctk.CTkComboBox(prof_frame, values=["Par défaut"], command=self.apply_profile)
        self.profile_menu.pack(side="left", padx=10, fill="x", expand=True)
        ctk.CTkButton(prof_frame, text="Sauver", width=80, command=self.save_profile).pack(side="left", padx=5)
        ctk.CTkButton(prof_frame, text="Supprimer", width=80, fg_color="#A12222", command=self.delete_profile).pack(side="left", padx=5)

        # --- SECTION : FICHIERS ---
        file_frame = ctk.CTkFrame(self)
        file_frame.pack(fill="x", padx=20, pady=10)
        ctk.CTkLabel(file_frame, text="Sélection des fichiers sources", font=("", 14, "bold")).pack(pady=5)
        self.add_file_row(file_frame, "Vocal (Analyse) :", self.vocal_path)
        self.add_file_row(file_frame, "Instru (Sortie) :", self.instru_path)
        self.add_file_row(file_frame, "Vidéo de fond :", self.bg_path)

        # --- CONTENEUR PARAMÈTRES ---
        param_container = ctk.CTkFrame(self, fg_color="transparent")
        param_container.pack(fill="x", padx=20, pady=5)
        
        # Section IA
        self.ia_box = ctk.CTkFrame(param_container)
        self.ia_box.pack(side="left", fill="both", expand=True, padx=(0, 5))
        self.ia_box.grid_columnconfigure(1, weight=1)
        ctk.CTkLabel(self.ia_box, text="Paramètres IA", font=("", 13, "bold")).grid(row=0, column=0, columnspan=2, pady=10)
        self.add_grid_row(self.ia_box, 1, "Modèle :", ctk.CTkComboBox(self.ia_box, values=["tiny", "base", "small", "medium", "large"], variable=self.model_var))
        self.add_grid_row(self.ia_box, 2, "Langue :", ctk.CTkEntry(self.ia_box, textvariable=self.lang_var))
        self.word_label = ctk.CTkLabel(self.ia_box, text="Mots par segment : 8")
        self.word_label.grid(row=3, column=0, columnspan=2, pady=(10,0))
        self.word_slider = ctk.CTkSlider(self.ia_box, from_=3, to=15, command=self.update_word_text)
        self.word_slider.set(8)
        self.word_slider.grid(row=4, column=0, columnspan=2, padx=10, pady=(0,10), sticky="ew")

        # Section Vidéo
        self.vid_box = ctk.CTkFrame(param_container)
        self.vid_box.pack(side="left", fill="both", expand=True, padx=(5, 0))
        self.vid_box.grid_columnconfigure(1, weight=1)
        ctk.CTkLabel(self.vid_box, text="Paramètres Vidéo", font=("", 13, "bold")).grid(row=0, column=0, columnspan=2, pady=10)
        self.add_grid_row(self.vid_box, 1, "Résolution :", ctk.CTkComboBox(self.vid_box, values=["1920x1080", "1280x720", "1080x1920"], variable=self.res_var))
        self.add_grid_row(self.vid_box, 2, "FPS :", ctk.CTkEntry(self.vid_box, textvariable=self.fps_var))
        self.add_grid_row(self.vid_box, 3, "Offset (s) :", ctk.CTkEntry(self.vid_box, textvariable=self.offset_var))
        ctk.CTkCheckBox(self.vid_box, text="Mode Progressif", variable=self.prog_var).grid(row=4, column=0, columnspan=2, pady=10)

        # --- LOGS & ACTIONS ---
        action_frame = ctk.CTkFrame(self, fg_color="transparent")
        action_frame.pack(fill="x", padx=20, pady=10)
        self.start_btn = ctk.CTkButton(action_frame, text="Lancer la génération", height=45, fg_color="#1f6aa5", font=("", 14, "bold"), command=self.start_thread)
        self.start_btn.pack(side="left", fill="x", expand=True, padx=(0, 5))
        self.cancel_btn = ctk.CTkButton(action_frame, text="Annuler", height=45, fg_color="grey", state="disabled", command=self.request_stop)
        self.cancel_btn.pack(side="left", fill="x", expand=True, padx=(5, 0))

        self.log_box = ctk.CTkTextbox(self, height=250, font=("Consolas", 12))
        self.log_box.pack(fill="both", expand=True, padx=20, pady=(0, 20))

    # --- PERSISTANCE ---
    def save_app_settings(self):
        """Sauvegarde la position et les réglages actuels"""
        settings = {
            "geometry": self.geometry(),
            "vocal": self.vocal_path.get(),
            "instru": self.instru_path.get(),
            "bg": self.bg_path.get(),
            "model": self.model_var.get(),
            "lang": self.lang_var.get(),
            "res": self.res_var.get(),
            "fps": self.fps_var.get(),
            "offset": self.offset_var.get(),
            "words": self.word_slider.get(),
            "prog": self.prog_var.get()
        }
        with open(CONFIG_FILE, "w") as f:
            json.dump(settings, f)

    def load_app_settings(self):
        """Restaure la position et les réglages"""
        if os.path.exists(CONFIG_FILE):
            try:
                with open(CONFIG_FILE, "r") as f:
                    s = json.load(f)
                    self.geometry(s.get("geometry", "1000x920"))
                    self.vocal_path.set(s.get("vocal", ""))
                    self.instru_path.set(s.get("instru", ""))
                    self.bg_path.set(s.get("bg", ""))
                    self.model_var.set(s.get("model", "large"))
                    self.lang_var.set(s.get("lang", "fr"))
                    self.res_var.set(s.get("res", "1920x1080"))
                    self.fps_var.set(s.get("fps", 30))
                    self.offset_var.set(s.get("offset", 0.1))
                    self.word_slider.set(s.get("words", 8))
                    self.update_word_text(s.get("words", 8))
                    self.prog_var.set(s.get("prog", True))
            except:
                pass

    def on_closing(self):
        """Action à la fermeture"""
        self.save_app_settings()
        self.destroy()

    # --- HELPERS UI ---
    def add_grid_row(self, parent, row_idx, label_text, widget):
        lbl = ctk.CTkLabel(parent, text=label_text, width=100, anchor="w")
        lbl.grid(row=row_idx, column=0, padx=(15, 5), pady=5, sticky="w")
        widget.grid(row=row_idx, column=1, padx=(5, 15), pady=5, sticky="ew")

    def add_file_row(self, parent, label, var):
        row = ctk.CTkFrame(parent, fg_color="transparent")
        row.pack(fill="x", padx=10, pady=3)
        ctk.CTkLabel(row, text=label, width=130, anchor="w").pack(side="left")
        ctk.CTkEntry(row, textvariable=var).pack(side="left", fill="x", expand=True, padx=5)
        ctk.CTkButton(row, text="...", width=40, command=lambda: var.set(filedialog.askopenfilename())).pack(side="right")

    def update_word_text(self, val):
        self.word_label.configure(text=f"Mots par segment : {int(val)}")

    def log(self, msg):
        self.log_box.insert("end", f"> {msg}\n")
        self.log_box.see("end")

    # --- PROFILS ---
    def load_all_profiles(self):
        if os.path.exists(PROFILES_FILE):
            with open(PROFILES_FILE, "r") as f: return json.load(f)
        return {}

    def load_profiles_list(self):
        names = list(self.load_all_profiles().keys())
        self.profile_menu.configure(values=names if names else ["Par défaut"])

    def save_profile(self):
        name = self.profile_menu.get()
        if not name or name == "Par défaut": return
        data = self.load_all_profiles()
        data[name] = {
            "model": self.model_var.get(), "lang": self.lang_var.get(), "words": self.word_slider.get(),
            "res": self.res_var.get(), "fps": self.fps_var.get(), "offset": self.offset_var.get(), "prog": self.prog_var.get()
        }
        with open(PROFILES_FILE, "w") as f: json.dump(data, f)
        self.load_profiles_list()
        self.log(f"Profil '{name}' sauvegardé.")

    def apply_profile(self, name):
        data = self.load_all_profiles()
        if name in data:
            p = data[name]
            self.model_var.set(p["model"]); self.lang_var.set(p.get("lang", "fr"))
            self.word_slider.set(p["words"]); self.update_word_text(p["words"])
            self.res_var.set(p["res"]); self.fps_var.set(p["fps"])
            self.offset_var.set(p.get("offset", 0.1)); self.prog_var.set(p.get("prog", True))

    def delete_profile(self):
        name = self.profile_menu.get()
        data = self.load_all_profiles()
        if name in data:
            del data[name]
            with open(PROFILES_FILE, "w") as f: json.dump(data, f)
            self.load_profiles_list()

    # --- TRAITEMENT ET CHRONOMÉTRAGE ---
    def start_thread(self):
        self.stop_requested = False
        threading.Thread(target=self.process, daemon=True).start()

    def request_stop(self):
        self.stop_requested = True
        if self.current_process:
            self.current_process.terminate()

    def process(self):
        try:
            total_start_time = time.time() # Début chrono global
            self.start_btn.configure(state="disabled")
            self.cancel_btn.configure(state="normal", fg_color="#A12222")
            
            vocal = self.vocal_path.get()
            instru = self.instru_path.get()
            if not vocal or not instru: raise ValueError("Fichiers audio manquants.")
            
            nom = Path(vocal).stem
            ass_path = Path(f"uvr/{nom}.ass")
            output_mp4 = Path(f"uvr/mp4/{nom}.mp4")
            ass_path.parent.mkdir(exist_ok=True); output_mp4.parent.mkdir(exist_ok=True)

            # Étape 1 : Transcription
            self.log("Étape 1 : Transcription Whisper...")
            t1_start = time.time()
            generer_karaoke(vocal, str(ass_path), max_mots=int(self.word_slider.get()), 
                            decalage_manuel=self.offset_var.get(), progressif=self.prog_var.get(),
                            langage=self.lang_var.get(), model_name=self.model_var.get())
            t1_end = time.time()
            self.log(f"⏱ Transcription terminée en {format_duration(t1_end - t1_start)}")

            if self.stop_requested: return

            # Étape 2 : FFmpeg
            self.log("Étape 2 : Rendu FFmpeg...")
            t2_start = time.time()
            self.render_with_ffmpeg(instru, ass_path, output_mp4)
            t2_end = time.time()
            self.log(f"⏱ Rendu vidéo terminé en {format_duration(t2_end - t2_start)}")

            if not self.stop_requested:
                total_duration = time.time() - total_start_time
                self.log(f"\n✅ Terminé avec succès !")
                self.log(f"📁 Vidéo : {output_mp4}")
                self.log(f"⌛ Durée totale : {format_duration(total_duration)}")

        except Exception as e:
            self.log(f"❌ Erreur : {e}")
        finally:
            self.start_btn.configure(state="normal")
            self.cancel_btn.configure(state="disabled", fg_color="grey")

    def render_with_ffmpeg(self, instru, ass_file, output):
        ass_filter = str(ass_file).replace("\\", "/").replace(":", "\\:")
        res = self.res_var.get()
        fps = self.fps_var.get()
        
        if self.bg_path.get():
            input_bg = ["-stream_loop", "-1", "-i", self.bg_path.get()]
        else:
            input_bg = ["-f", "lavfi", "-i", f"color=size={res}:rate={fps}:color=black"]

        cmd = ["ffmpeg", "-y"] + input_bg + ["-i", instru, "-vf", 
               f"scale={res.replace('x',':')}:force_original_aspect_ratio=increase,crop={res.replace('x',':')},ass='{ass_filter}'",
               "-c:v", "libx264", "-crf", "18", "-pix_fmt", "yuv420p", "-c:a", "aac", "-shortest", str(output)]
        
        self.current_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
        for line in self.current_process.stdout:
            if "frame=" in line:
                pass # Feedback silencieux pour ne pas encombrer le log
        self.current_process.wait()

if __name__ == "__main__":
    app = KaraokeApp()
    app.mainloop()
