# -*- coding: utf-8 -*-
"""
Seed-VC 处理器
提供与voice_changer模块兼容的接口
"""

import os
import sys
import asyncio
import tempfile
from pathlib import Path
from typing import Optional, Callable
import numpy as np

# 设置HuggingFace镜像
os.environ.setdefault("HF_ENDPOINT", "https://hf-mirror.com")


class SeedVCProcessor:
    """Seed-VC 语音处理器"""
    
    def __init__(self):
        # seed-vc-test 在 Downloads 目录下
        self.seed_vc_path = Path("C:/Users/ggpq/Downloads/seed-vc-test")
        self.cache_dir = Path(__file__).parent.parent.parent / "cache"
        self.cache_dir.mkdir(exist_ok=True)
        
        self.reference_audio: Optional[str] = None
        self.diffusion_steps = 10  # 减少步数提高速度（实时用）
        self.semi_tone_shift = 0
        self.auto_f0_adjust = True
        
        self._initialized = False
        self._log_callback: Optional[Callable[[str], None]] = None
        
        # 实时处理相关
        self._realtime_running = False
        self._realtime_thread = None
        self._audio_queue = None
    
    def set_log_callback(self, callback: Callable[[str], None]):
        """设置日志回调"""
        self._log_callback = callback
    
    def _log(self, msg: str):
        """输出日志"""
        print(f"[Seed-VC] {msg}")
        if self._log_callback:
            self._log_callback(f"[Seed-VC] {msg}")
    
    def check_available(self) -> bool:
        """检查Seed-VC是否可用"""
        if not self.seed_vc_path.exists():
            self._log("❌ Seed-VC源码未找到")
            self._log(f"  期望路径: {self.seed_vc_path}")
            return False
        
        inference_py = self.seed_vc_path / "inference.py"
        if not inference_py.exists():
            self._log("❌ inference.py 未找到")
            return False
        
        # 检查torch
        try:
            import torch
            if torch.cuda.is_available():
                self._log(f"✅ GPU可用: {torch.cuda.get_device_name(0)}")
            else:
                self._log("⚠️ GPU不可用，将使用CPU")
        except ImportError:
            self._log("❌ PyTorch未安装")
            return False
        
        self._initialized = True
        return True
    
    def set_reference(self, audio_path: str):
        """设置参考音频（目标音色）"""
        if not Path(audio_path).exists():
            self._log(f"❌ 参考音频不存在: {audio_path}")
            return False
        
        self.reference_audio = audio_path
        self._log(f"✅ 参考音频: {Path(audio_path).name}")
        return True
    
    async def generate_reference_tts(self, voice: str = "zh-CN-XiaoyiNeural") -> str:
        """使用TTS生成参考音频"""
        import edge_tts
        
        reference_path = self.cache_dir / f"seed_reference_{voice}.wav"
        
        if not reference_path.exists():
            text = "这是一段参考音频，用于设置目标音色，声音要清晰自然，语速适中。"
            communicate = edge_tts.Communicate(text, voice)
            await communicate.save(str(reference_path))
            self._log(f"✅ 生成参考音频: {voice}")
        
        self.reference_audio = str(reference_path)
        return str(reference_path)
    
    def convert(
        self,
        source_audio: str,
        output_path: Optional[str] = None,
    ) -> Optional[str]:
        """
        转换语音
        
        Args:
            source_audio: 源音频路径
            output_path: 输出路径（可选）
        
        Returns:
            输出文件路径，失败返回None
        """
        if not self._initialized:
            if not self.check_available():
                return None
        
        if not self.reference_audio:
            self._log("❌ 未设置参考音频")
            return None
        
        if not Path(source_audio).exists():
            self._log(f"❌ 源音频不存在: {source_audio}")
            return None
        
        try:
            import subprocess
            
            # 输出目录
            if output_path:
                output_dir = str(Path(output_path).parent)
            else:
                output_dir = str(self.cache_dir / "seed_output")
                Path(output_dir).mkdir(exist_ok=True)
            
            cmd = [
                sys.executable,
                str(self.seed_vc_path / "inference.py"),
                "--source", source_audio,
                "--target", self.reference_audio,
                "--output", output_dir,
                "--diffusion-steps", str(self.diffusion_steps),
                "--semi-tone-shift", str(self.semi_tone_shift),
                "--auto-f0-adjust", str(self.auto_f0_adjust),
                "--length-adjust", "1.0",
                "--inference-cfg-rate", "0.7",
                "--fp16", "True"
            ]
            
            env = os.environ.copy()
            env["HF_ENDPOINT"] = "https://hf-mirror.com"
            
            self._log(f"🔄 转换中... (steps={self.diffusion_steps})")
            
            result = subprocess.run(
                cmd,
                capture_output=True,
                text=True,
                env=env,
                cwd=str(self.seed_vc_path)
            )
            
            # 检查输出
            if "RTF:" in result.stderr or result.returncode == 0:
                # 找到输出文件
                output_files = list(Path(output_dir).glob("*.wav"))
                if output_files:
                    # 返回最新的文件
                    latest = max(output_files, key=lambda p: p.stat().st_mtime)
                    self._log(f"✅ 转换完成: {latest.name}")
                    return str(latest)
            
            self._log(f"❌ 转换失败")
            if result.stderr:
                self._log(f"  错误: {result.stderr[:200]}")
            return None
            
        except Exception as e:
            self._log(f"❌ 转换异常: {e}")
            return None
    
    def convert_tts_text(
        self,
        text: str,
        tts_voice: str = "zh-CN-YunxiNeural",
    ) -> Optional[str]:
        """
        将文本转为TTS后再进行变声
        
        Args:
            text: 要转换的文本
            tts_voice: TTS语音（建议男声输入）
        
        Returns:
            变声后的音频路径
        """
        import asyncio
        
        async def _generate_and_convert():
            import edge_tts
            
            # 生成TTS
            source_path = self.cache_dir / "seed_tts_source.wav"
            communicate = edge_tts.Communicate(text, tts_voice)
            await communicate.save(str(source_path))
            self._log(f"📢 TTS生成完成: {tts_voice}")
            
            # 变声
            return self.convert(str(source_path))
        
        return asyncio.run(_generate_and_convert())
    
    def start_realtime(
        self,
        input_device: int = None,
        output_device: int = None,
        buffer_seconds: float = 2.0,
    ) -> bool:
        """
        启动实时变声（基于缓冲区的近实时处理）
        
        Args:
            input_device: 输入设备ID
            output_device: 输出设备ID  
            buffer_seconds: 缓冲区长度（秒），越长延迟越大但质量更好
        """
        if not self._initialized:
            if not self.check_available():
                return False
        
        if not self.reference_audio:
            self._log("❌ 未设置参考音频")
            return False
        
        if self._realtime_running:
            self._log("⚠️ 实时变声已在运行")
            return True
        
        import threading
        import queue
        import sounddevice as sd
        import soundfile as sf
        import tempfile
        import numpy as np
        
        self._realtime_running = True
        self._audio_queue = queue.Queue()
        
        sr = 22050  # Seed-VC采样率
        buffer_size = int(sr * buffer_seconds)
        
        # 保存输出设备供线程使用
        self._output_device = output_device
        
        def process_thread():
            """后台处理线程"""
            audio_buffer = np.array([], dtype=np.float32)
            
            while self._realtime_running:
                try:
                    # 从队列获取音频块
                    chunk = self._audio_queue.get(timeout=0.1)
                    audio_buffer = np.concatenate([audio_buffer, chunk])
                    
                    # 缓冲区满了就处理
                    if len(audio_buffer) >= buffer_size:
                        to_process = audio_buffer[:buffer_size]
                        audio_buffer = audio_buffer[buffer_size // 2:]  # 50%重叠
                        
                        # 保存临时文件
                        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
                            input_path = f.name
                        sf.write(input_path, to_process, sr)
                        
                        # Seed-VC处理
                        output_path = self.convert(input_path)
                        
                        if output_path and Path(output_path).exists():
                            # 播放处理后的音频
                            data, out_sr = sf.read(output_path, dtype='float32')
                            self._log(f"🔊 播放变声音频 ({len(data)/out_sr:.1f}秒)")
                            try:
                                # 使用默认设备或指定设备
                                if self._output_device is not None:
                                    sd.play(data, out_sr, device=self._output_device)
                                else:
                                    sd.play(data, out_sr)
                                sd.wait()  # 等待播放完成
                            except Exception as play_err:
                                self._log(f"播放错误: {play_err}")
                                # 尝试使用默认设备
                                sd.play(data, out_sr)
                                sd.wait()
                        
                        # 清理
                        if Path(input_path).exists():
                            Path(input_path).unlink()
                        
                except queue.Empty:
                    continue
                except Exception as e:
                    self._log(f"处理错误: {e}")
        
        def audio_callback(indata, frames, time_info, status):
            """音频输入回调"""
            if status:
                pass
            # 重采样到22050Hz并放入队列
            import librosa
            audio = indata[:, 0].astype(np.float32)
            # 假设输入是44100Hz
            audio_resampled = librosa.resample(audio, orig_sr=44100, target_sr=sr)
            self._audio_queue.put(audio_resampled)
        
        # 启动处理线程
        self._realtime_thread = threading.Thread(target=process_thread, daemon=True)
        self._realtime_thread.start()
        
        # 启动音频流
        self._stream = sd.InputStream(
            device=input_device,
            channels=1,
            samplerate=44100,
            blocksize=4096,
            callback=audio_callback
        )
        self._stream.start()
        
        self._log(f"🎤 实时变声已启动 (缓冲{buffer_seconds}秒)")
        return True
    
    def stop_realtime(self):
        """停止实时变声"""
        self._realtime_running = False
        
        if hasattr(self, '_stream') and self._stream:
            self._stream.stop()
            self._stream.close()
            self._stream = None
        
        if self._realtime_thread:
            self._realtime_thread.join(timeout=2)
            self._realtime_thread = None
        
        self._log("⏹ 实时变声已停止")


# 全局处理器实例
_processor: Optional[SeedVCProcessor] = None


def get_processor() -> SeedVCProcessor:
    """获取处理器单例"""
    global _processor
    if _processor is None:
        _processor = SeedVCProcessor()
    return _processor
