Python文字转语音案例怎么写？

wen python案例 2026-06-07 01:36:43 2

本文目录导读：

方法一：使用 pyttsx3（离线，无需网络）
方法二：使用 gTTS（谷歌在线语音，需要网络）
方法三：使用百度AI语音合成（免费额度）
方法四：使用 Edge TTS（微软在线语音，高质量）
安装依赖
完整示例：带GUI界面的文本转语音
使用建议

我将为您介绍几种Python文字转语音的实现方法,从简单到复杂：

使用 pyttsx3（离线，无需网络）

import pyttsx3
# 初始化
engine = pyttsx3.init()
# 基本使用
text = "你好，欢迎使用Python文字转语音功能！"
engine.say(text)
engine.runAndWait()
# 高级配置
def text_to_speech_advanced():
    # 获取语音属性
    voices = engine.getProperty('voices')
    # 设置语音（中文语音）
    for voice in voices:
        if 'chinese' in voice.name.lower() or 'yue' in voice.name.lower():
            engine.setProperty('voice', voice.id)
            break
    # 设置语速（范围一般是0-200，默认200）
    engine.setProperty('rate', 150)
    # 设置音量（0.0-1.0）
    engine.setProperty('volume', 0.8)
    text = "这是一段使用自定义配置的文字转语音案例。"
    engine.say(text)
    engine.runAndWait()
# 保存到文件
def save_to_file():
    text = "这段文字将被保存为音频文件。"
    engine.save_to_file(text, 'output.mp3')
    engine.runAndWait()

使用 gTTS（谷歌在线语音，需要网络）

from gtts import gTTS
import os
from playsound import playsound
def gtts_example():
    # 文字转语音
    text = "你好，这是使用谷歌语音合成的示例。"
    # 创建gTTS对象
    tts = gTTS(text=text, lang='zh-cn', slow=False)
    # 保存为音频文件
    tts.save("output.mp3")
    # 播放音频
    playsound("output.mp3")
def gtts_multi_language():
    # 支持多种语言
    texts = {
        'zh-cn': '你好世界',
        'en': 'Hello World',
        'ja': 'こんにちは世界',
        'ko': '안녕하세요 세계'
    }
    for lang, text in texts.items():
        tts = gTTS(text=text, lang=lang)
        tts.save(f"{lang}_hello.mp3")
        print(f"已生成 {lang} 语言的音频文件")

使用百度AI语音合成（免费额度）

from aip import AipSpeech
# 百度AI配置（需要在百度AI平台注册应用获取）
APP_ID = '你的APP_ID'
API_KEY = '你的API_KEY'
SECRET_KEY = '你的SECRET_KEY'
def baidu_tts():
    client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)
    text = "你好，这是百度飞桨的文字转语音功能。"
    # 合成语音
    result = client.synthesis(text, 'zh', 1, {
        'vol': 5,      # 音量，0-15
        'spd': 5,      # 语速，0-15
        'pit': 5,      # 音调，0-15
        'per': 0       # 发音人，0为女声，1为男声
    })
    # 保存音频文件
    if not isinstance(result, dict):
        with open('baidu_audio.mp3', 'wb') as f:
            f.write(result)
        print("音频文件已保存为 baidu_audio.mp3")

使用 Edge TTS（微软在线语音，高质量）

import asyncio
import edge_tts
async def edge_tts_example():
    # 定义文本和语音参数
    text = "你好，这是使用微软Edge的语音合成技术生成的语音。"
    voice = "zh-CN-XiaoxiaoNeural"  # 中文女声
    # 创建通讯对象
    communicate = edge_tts.Communicate(text, voice)
    # 保存为音频文件
    await communicate.save("edge_audio.mp3")
    print("音频文件已生成！")
# 运行异步函数
asyncio.run(edge_tts_example())
# 列出可用的中文语音
async def list_chinese_voices():
    voices = await edge_tts.list_voices()
    chinese_voices = [v for v in voices if 'zh-' in v['Name']]
    for voice in chinese_voices:
        print(f"{voice['Name']} - {voice['DisplayName']}")
asyncio.run(list_chinese_voices())

安装依赖

# 基础安装
pip install pyttsx3
pip install gtts
pip install playsound
pip install baidu-aip
pip install edge-tts
# 如果使用pyttsx3有问题，可能需要安装：
# Windows: 正常安装即可
# Mac: pip install pyobjc
# Linux: sudo apt-get install espeak

完整示例：带GUI界面的文本转语音

import tkinter as tk
from tkinter import ttk
import pyttsx3
import threading
class TextToSpeechApp:
    def __init__(self, root):
        self.root = root
        self.root.title("文字转语音工具")
        self.root.geometry("500x400")
        # 初始化语音引擎
        self.engine = pyttsx3.init()
        # 创建UI组件
        self.create_widgets()
    def create_widgets(self):
        # 文本输入区域
        tk.Label(self.root, text="请输入文字：", font=("微软雅黑", 12)).pack(pady=10)
        self.text_input = tk.Text(self.root, height=8, width=50)
        self.text_input.pack(pady=10)
        # 控制区域
        control_frame = tk.Frame(self.root)
        control_frame.pack(pady=10)
        # 语速控制
        tk.Label(control_frame, text="语速：").grid(row=0, column=0)
        self.speed_var = tk.IntVar(value=150)
        speed_scale = tk.Scale(control_frame, from_=50, to=300, 
                                variable=self.speed_var, orient=tk.HORIZONTAL)
        speed_scale.grid(row=0, column=1)
        # 音量控制
        tk.Label(control_frame, text="音量：").grid(row=1, column=0)
        self.volume_var = tk.DoubleVar(value=0.8)
        volume_scale = tk.Scale(control_frame, from_=0.0, to=1.0, 
                                variable=self.volume_var, resolution=0.1,
                                orient=tk.HORIZONTAL)
        volume_scale.grid(row=1, column=1)
        # 按钮区域
        button_frame = tk.Frame(self.root)
        button_frame.pack(pady=20)
        tk.Button(button_frame, text="播放", command=self.speak, 
                 bg="green", fg="white", width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="暂停", command=self.pause,
                 bg="orange", fg="white", width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="停止", command=self.stop,
                 bg="red", fg="white", width=10).pack(side=tk.LEFT, padx=5)
        tk.Button(button_frame, text="保存文件", command=self.save_to_file,
                 bg="blue", fg="white", width=10).pack(side=tk.LEFT, padx=5)
    def speak(self):
        text = self.text_input.get("1.0", tk.END).strip()
        if text:
            # 在新线程中播放语音，避免界面卡顿
            threading.Thread(target=self._speak_thread, args=(text,), daemon=True).start()
    def _speak_thread(self, text):
        self.engine.setProperty('rate', self.speed_var.get())
        self.engine.setProperty('volume', self.volume_var.get())
        self.engine.say(text)
        self.engine.runAndWait()
    def pause(self):
        self.engine.stop()  # 简单实现暂停
    def stop(self):
        self.engine.stop()
    def save_to_file(self):
        text = self.text_input.get("1.0", tk.END).strip()
        if text:
            filename = tk.filedialog.asksaveasfilename(
                defaultextension=".mp3",
                filetypes=[("MP3 files", "*.mp3"), ("All files", "*.*")]
            )
            if filename:
                self.engine.save_to_file(text, filename)
                self.engine.runAndWait()
                tk.messagebox.showinfo("成功", f"文件已保存为：{filename}")
# 运行应用
if __name__ == "__main__":
    root = tk.Tk()
    app = TextToSpeechApp(root)
    root.mainloop()