- Actualización de URLs de modelos de Vosk con versiones específicas - Añadidas URLs verificadas para todos los idiomas soportados - Implementación de fallback a inglés si el idioma solicitado no está disponible - Mejor manejo de errores y mensajes más informativos durante la descarga - Se muestra la URL específica que se está descargando 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
369 lines
No EOL
15 KiB
Python
Executable file
369 lines
No EOL
15 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# [Script] : claude_voice.py
|
|
# [Apps] : MRDEVS TOOLS
|
|
# [Description]: Convierte instrucciones de voz a texto para Claude Code
|
|
# [Author] : Cortana Rosero One <cortana@rosero.one>
|
|
# [Generated] : Created by Claude Code (claude-3-7-sonnet-20250219)
|
|
# [Created] : 2025/03/30 16:45:00
|
|
# [Modified] : 2025/03/30 17:45:00
|
|
# [Version] : 1.3.0
|
|
# [Use Notes] : Instalar dependencias en Ubuntu/Debian: sudo apt install python3-pyaudio python3-pip && pip install --user vosk pydub sounddevice wget
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import subprocess
|
|
import argparse
|
|
import time
|
|
import queue
|
|
import threading
|
|
|
|
# Colores para la salida
|
|
class Colors:
|
|
PURPLE = '\033[95m'
|
|
BLUE = '\033[94m'
|
|
CYAN = '\033[96m'
|
|
GREEN = '\033[92m'
|
|
YELLOW = '\033[93m'
|
|
RED = '\033[91m'
|
|
BOLD = '\033[1m'
|
|
UNDERLINE = '\033[4m'
|
|
END = '\033[0m'
|
|
|
|
def play_sound(sound_type):
|
|
"""Reproduce un sonido para indicar estados"""
|
|
sounds_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "sounds")
|
|
if not os.path.exists(sounds_dir):
|
|
os.makedirs(sounds_dir, exist_ok=True)
|
|
|
|
# Usar sonidos predeterminados si existen, o crearlos
|
|
sound_files = {
|
|
"start": os.path.join(sounds_dir, "start.mp3"),
|
|
"stop": os.path.join(sounds_dir, "stop.mp3"),
|
|
"error": os.path.join(sounds_dir, "error.mp3")
|
|
}
|
|
|
|
# Si no hay archivo de sonido, usar un beep básico
|
|
try:
|
|
if os.path.exists(sound_files[sound_type]):
|
|
sound = AudioSegment.from_file(sound_files[sound_type])
|
|
play(sound)
|
|
else:
|
|
# Frecuencias para diferentes tipos de sonidos
|
|
if sound_type == "start":
|
|
print("\a") # Beep básico del sistema
|
|
elif sound_type == "stop":
|
|
print("\a")
|
|
time.sleep(0.1)
|
|
print("\a")
|
|
elif sound_type == "error":
|
|
print("\a")
|
|
time.sleep(0.1)
|
|
print("\a")
|
|
time.sleep(0.1)
|
|
print("\a")
|
|
except Exception:
|
|
# Si hay algún error reproduciendo el sonido, simplemente continuamos
|
|
pass
|
|
|
|
def download_model(language="es"):
|
|
"""Descarga el modelo de Vosk si no existe"""
|
|
# Mapeo de códigos de idioma estándar a formato de Vosk
|
|
language_map = {
|
|
"es-ES": "es",
|
|
"en-US": "en-us",
|
|
"fr-FR": "fr",
|
|
"de-DE": "de",
|
|
"it-IT": "it",
|
|
"pt-PT": "pt",
|
|
"ru-RU": "ru"
|
|
}
|
|
|
|
# Obtener código de idioma para Vosk
|
|
lang_code = language_map.get(language, language)
|
|
if "-" in lang_code and lang_code not in language_map.values():
|
|
lang_code = lang_code.split("-")[0]
|
|
|
|
# Establecer URLs de modelos verificados
|
|
model_urls = {
|
|
"es": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip",
|
|
"en-us": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip",
|
|
"fr": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip",
|
|
"de": "https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip",
|
|
"it": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip",
|
|
"pt": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip",
|
|
"ru": "https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip"
|
|
}
|
|
|
|
# Si no hay URL para este idioma, usar el inglés como fallback
|
|
if lang_code not in model_urls:
|
|
print(f"{Colors.YELLOW}No se encontró un modelo para el idioma '{lang_code}', usando el inglés como alternativa.{Colors.END}")
|
|
lang_code = "en-us"
|
|
|
|
# Usar el directorio de modelos con versión específica
|
|
model_name = os.path.basename(model_urls[lang_code]).replace(".zip", "")
|
|
model_path = os.path.expanduser(f"~/.vosk/models/{model_name}")
|
|
if os.path.exists(model_path):
|
|
return model_path
|
|
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Descargando modelo de voz para {lang_code}...{Colors.END}")
|
|
|
|
# Crear directorio para modelos si no existe
|
|
os.makedirs(os.path.expanduser("~/.vosk/models"), exist_ok=True)
|
|
|
|
# Descargar e instalar el modelo
|
|
try:
|
|
# Importar wget solo cuando se necesita
|
|
import wget
|
|
url = model_urls[lang_code]
|
|
zip_path = os.path.expanduser(f"~/.vosk/models/{os.path.basename(url)}")
|
|
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Descargando desde: {url}{Colors.END}")
|
|
|
|
# Descargar el modelo
|
|
wget.download(url, zip_path)
|
|
print() # Nueva línea después de la barra de progreso
|
|
|
|
# Extraer el modelo
|
|
import zipfile
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
|
zip_ref.extractall(os.path.expanduser("~/.vosk/models/"))
|
|
|
|
# Eliminar el zip
|
|
os.remove(zip_path)
|
|
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}Modelo descargado y extraído correctamente en {model_path}{Colors.END}")
|
|
return model_path
|
|
except Exception as e:
|
|
print(f"{Colors.RED}Error al descargar el modelo: {e}{Colors.END}")
|
|
print(f"{Colors.YELLOW}Por favor, descargue manualmente el modelo desde: {Colors.UNDERLINE}https://alphacephei.com/vosk/models{Colors.END}")
|
|
print(f"{Colors.YELLOW}Y colóquelo en: {model_path}{Colors.END}")
|
|
sys.exit(1)
|
|
|
|
def recognize_speech(language="es-ES"):
|
|
"""Captura audio del micrófono y lo convierte a texto usando Vosk (local)"""
|
|
# Descargar o verificar modelo
|
|
model_path = download_model(language)
|
|
|
|
# Configurar el modelo
|
|
model = Model(model_path)
|
|
samplerate = 16000
|
|
|
|
# Configurar cola para recibir audio
|
|
q = queue.Queue()
|
|
|
|
# Función para callback de audio
|
|
def callback(indata, frames, time, status):
|
|
if status:
|
|
print(f"{Colors.YELLOW}[Claude Voice] Status: {status}{Colors.END}")
|
|
q.put(bytes(indata))
|
|
|
|
# Iniciar captura de audio
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Escuchando...{Colors.END} (Presiona Ctrl+C para detener)")
|
|
play_sound("start")
|
|
|
|
# Preparar reconocedor
|
|
rec = KaldiRecognizer(model, samplerate)
|
|
|
|
try:
|
|
with sd.RawInputStream(samplerate=samplerate, blocksize=8000,
|
|
dtype='int16', channels=1, callback=callback):
|
|
|
|
# Variables para controlar el reconocimiento
|
|
start_time = time.time()
|
|
timeout = 10 # segundos
|
|
last_text_time = time.time()
|
|
final_result = ""
|
|
partial_results = []
|
|
|
|
# Procesar audio
|
|
while True:
|
|
# Comprobar timeout
|
|
if (time.time() - start_time) > timeout:
|
|
break
|
|
|
|
# Comprobar si hay silencio prolongado después de hablar
|
|
if final_result and (time.time() - last_text_time) > 1.5:
|
|
break
|
|
|
|
# Obtener datos de audio
|
|
data = q.get()
|
|
|
|
# Reconocer voz
|
|
if rec.AcceptWaveform(data):
|
|
result = json.loads(rec.Result())
|
|
if result.get("text", ""):
|
|
text = result["text"]
|
|
final_result = text
|
|
last_text_time = time.time()
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}Procesando audio...{Colors.END}")
|
|
else:
|
|
# Resultados parciales
|
|
partial = json.loads(rec.PartialResult())
|
|
if partial.get("partial", ""):
|
|
partial_text = partial["partial"]
|
|
if partial_text:
|
|
partial_results.append(partial_text)
|
|
last_text_time = time.time()
|
|
|
|
except KeyboardInterrupt:
|
|
print(f"\n{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Reconocimiento interrumpido{Colors.END}")
|
|
except Exception as e:
|
|
play_sound("error")
|
|
print(f"{Colors.RED}Error en el reconocimiento de voz: {e}{Colors.END}")
|
|
return None
|
|
finally:
|
|
play_sound("stop")
|
|
|
|
# Si no hay resultado final pero hay parciales, usar el último parcial
|
|
if not final_result and partial_results:
|
|
final_result = partial_results[-1]
|
|
|
|
return final_result
|
|
|
|
def send_to_claude(text, silent=False):
|
|
"""Envía el texto reconocido a Claude Code"""
|
|
if not text:
|
|
return False
|
|
|
|
if not silent:
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} Enviando a Claude Code: {Colors.BOLD}{text}{Colors.END}")
|
|
|
|
try:
|
|
# Usar la ruta de instalación de Claude Code
|
|
claude_cmd = "claude" if os.system("which claude > /dev/null 2>&1") == 0 else "/usr/local/bin/claude"
|
|
|
|
# Enviar el texto como entrada a Claude
|
|
result = subprocess.run([claude_cmd, text],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
text=True,
|
|
check=False)
|
|
|
|
if result.returncode != 0:
|
|
print(f"{Colors.RED}Error al ejecutar Claude Code: {result.stderr}{Colors.END}")
|
|
return False
|
|
return True
|
|
except Exception as e:
|
|
print(f"{Colors.RED}Error al ejecutar Claude Code: {e}{Colors.END}")
|
|
return False
|
|
|
|
def interactive_mode(language="es-ES", continuous=False):
|
|
"""Modo interactivo que escucha continuamente comandos de voz"""
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}Modo interactivo iniciado. Di tus instrucciones para Claude Code.{Colors.END}")
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Di 'salir' o 'terminar' para finalizar{Colors.END}")
|
|
|
|
try:
|
|
while True:
|
|
text = recognize_speech(language)
|
|
|
|
if text:
|
|
text = text.strip()
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}Reconocido: {Colors.BOLD}{text}{Colors.END}")
|
|
|
|
# Verificar comandos de salida
|
|
if text.lower() in ["salir", "terminar", "exit", "quit", "goodbye", "bye"]:
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Saliendo del modo de voz...{Colors.END}")
|
|
break
|
|
|
|
# Enviar a Claude Code
|
|
success = send_to_claude(text)
|
|
|
|
# Si no es modo continuo, salir después del primer comando exitoso
|
|
if not continuous and success:
|
|
break
|
|
|
|
# Pausa breve entre reconocimientos
|
|
if continuous:
|
|
time.sleep(1)
|
|
|
|
except KeyboardInterrupt:
|
|
print(f"\n{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Modo de voz interrumpido por el usuario{Colors.END}")
|
|
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}¡Hasta pronto!{Colors.END}")
|
|
|
|
def list_audio_devices():
|
|
"""Lista los dispositivos de audio disponibles"""
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.GREEN}Dispositivos de audio disponibles:{Colors.END}")
|
|
devices = sd.query_devices()
|
|
|
|
print(f"{Colors.CYAN}{'ID':<4} {'Nombre':<30} {'Canales (E/S)':<15} {'Predeterminado':<12}{Colors.END}")
|
|
print("-" * 65)
|
|
|
|
for i, device in enumerate(devices):
|
|
default_mark = ""
|
|
try:
|
|
if device.get('name') == sd.query_devices(kind='input')['name']:
|
|
default_mark = "⭐ (entrada)"
|
|
elif device.get('name') == sd.query_devices(kind='output')['name']:
|
|
default_mark = "⭐ (salida)"
|
|
except:
|
|
pass
|
|
|
|
channels = f"{device.get('max_input_channels', 0)}/{device.get('max_output_channels', 0)}"
|
|
print(f"{i:<4} {device.get('name', 'Desconocido'):<30} {channels:<15} {default_mark}")
|
|
|
|
return True
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Claude Code Voice - Convierte voz a texto para Claude Code usando reconocimiento local')
|
|
parser.add_argument('-l', '--language', default='es-ES', help='Idioma para reconocimiento (ej. es-ES, en-US)')
|
|
parser.add_argument('-c', '--continuous', action='store_true', help='Modo continuo - escucha constantemente hasta que digas "salir"')
|
|
parser.add_argument('-t', '--text', help='Texto a enviar directamente (sin reconocimiento de voz)')
|
|
parser.add_argument('-s', '--silent', action='store_true', help='Modo silencioso - no muestra mensajes extra')
|
|
parser.add_argument('-d', '--device', type=int, help='ID del dispositivo de audio a utilizar')
|
|
parser.add_argument('--list-devices', action='store_true', help='Listar dispositivos de audio disponibles')
|
|
parser.add_argument('--install-deps', action='store_true', help='Instalar dependencias')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Instalar dependencias si se solicita
|
|
if args.install_deps:
|
|
try:
|
|
print(f"{Colors.BLUE}[Claude Voice]{Colors.END} {Colors.YELLOW}Instrucciones para instalar dependencias...{Colors.END}")
|
|
print("\nPara sistemas Ubuntu/Debian, ejecuta los siguientes comandos:")
|
|
print(f"{Colors.GREEN}sudo apt install python3-pyaudio python3-pip{Colors.END}")
|
|
print(f"{Colors.GREEN}pip install --user vosk pydub sounddevice wget{Colors.END}")
|
|
|
|
print("\nPara otros sistemas, consulta la documentación de Vosk:")
|
|
print(f"{Colors.GREEN}https://alphacephei.com/vosk/install{Colors.END}")
|
|
|
|
print("\nSi prefieres usar un entorno virtual (recomendado):")
|
|
print(f"{Colors.GREEN}sudo apt install python3-venv python3-pyaudio{Colors.END}")
|
|
print(f"{Colors.GREEN}python3 -m venv ~/venv-claude-voice{Colors.END}")
|
|
print(f"{Colors.GREEN}source ~/venv-claude-voice/bin/activate{Colors.END}")
|
|
print(f"{Colors.GREEN}pip install vosk pydub sounddevice wget{Colors.END}")
|
|
print(f"{Colors.GREEN}# Luego ejecuta: ~/venv-claude-voice/bin/python3 /home/mrosero/devs/bin/claude_voice.py{Colors.END}")
|
|
|
|
return
|
|
except Exception as e:
|
|
print(f"{Colors.RED}Error al mostrar instrucciones: {e}{Colors.END}")
|
|
sys.exit(1)
|
|
|
|
# Verificar si las dependencias están instaladas
|
|
try:
|
|
# Intentar importar las dependencias
|
|
import sounddevice as sd
|
|
from vosk import Model, KaldiRecognizer
|
|
from pydub import AudioSegment
|
|
from pydub.playback import play
|
|
except ImportError as e:
|
|
print(f"{Colors.RED}Error: Faltan dependencias. {e}{Colors.END}")
|
|
print(f"{Colors.YELLOW}Ejecuta '{sys.argv[0]} --install-deps' para ver instrucciones de instalación.{Colors.END}")
|
|
sys.exit(1)
|
|
|
|
# Listar dispositivos si se solicita
|
|
if args.list_devices:
|
|
list_audio_devices()
|
|
return
|
|
|
|
# Enviar texto directo si se proporciona
|
|
if args.text:
|
|
send_to_claude(args.text, args.silent)
|
|
return
|
|
|
|
# Modo interactivo con reconocimiento de voz
|
|
interactive_mode(args.language, args.continuous)
|
|
|
|
if __name__ == "__main__":
|
|
main() |