Python实现语音启动电脑应用程序_F11 - 专业站长和开发者的学习网站

实现思路

osk模型进行输入语音转换

txt字典导航程序路径

pyttsx3引擎进行语音打印输出

关键词=程序路径

完整代码

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

import os

import json

import queue

import sounddevice as sd

from vosk import Model, KaldiRecognizer

import subprocess

import time

import pyttsx3

import threading

# 初始化 pyttsx3 引擎

engine = pyttsx3.init()

engine.setProperty('rate', 150) # 设置语速

engine.setProperty('volume', 1.0) # 设置音量

# 加载Vosk模型

model_path = r"D:\daku\yuyinshibie\vosk-model-small-cn-0.22"

if not os.path.exists(model_path):

print(f"模型路径不存在: {model_path}")

engine.say(f"模型路径不存在: {model_path}")

engine.runAndWait()

exit(1)

# 读取字典文件，格式为 "命令=程序路径"

def load_app_dict(file_path):

app_dict = {}

if not os.path.exists(file_path):

print(f"字典文件不存在: {file_path}")

engine.say(f"字典文件不存在: {file_path}")

engine.runAndWait()

return app_dict

with open(file_path, 'r', encoding='utf-8') as file:

for line in file:

parts = line.strip().split('=')

if len(parts) == 2:

keys, value = parts

# 处理可能存在的别名情况，例如 "微信,weixin"

for key in keys.split('，'):

app_dict[key.strip()] = value.strip()

return app_dict

# 启动应用程序

def launch_application(app_name, app_dict):

if app_name in app_dict:

app_path = app_dict[app_name]

response = f"正在启动 {app_name}..."

say(response)

subprocess.Popen(app_path)

time.sleep(2) # 等待2秒再继续监听

else:

response = f"找不到与 '{app_name}' 对应的应用程序。"

say(response)

# 定义一个函数用于语音输出，并在说的时候暂停监听

def say(text):

global stream, callback_func

if stream is not None:

with stream_lock:

stream.callback = None # 移除回调函数以暂停监听

stream.stop() # 暂停音频流

engine.say(text)

engine.runAndWait()

if stream is not None:

with stream_lock:

stream.start() # 恢复音频流

stream.callback = callback_func # 重新设置回调函数

# 初始化模型和识别器

model = Model(model_path)

rec = KaldiRecognizer(model, 16000)

q = queue.Queue()

last_partial_result = ""

last_full_command = ""

stream_lock = threading.Lock()

stream = None

callback_func = None

def callback(indata, frames, time, status):

if status:

print(status, file=sys.stderr)

q.put(bytes(indata))

# 主程序

if __name__ == "__main__":

dict_file = r"D:\daku\yuyinshibie\zidian.txt" # 字典文件路径

app_dict = load_app_dict(dict_file)

try:

# 提前初始化音频流

callback_func = callback

stream = sd.RawInputStream(samplerate=16000, blocksize=8000, dtype='int16',

channels=1, callback=callback)

stream.start()

say("请说：")

while True:

data = q.get()

if rec.AcceptWaveform(data):

result = json.loads(rec.Result())

command = result['text'].strip()

if command and command != last_full_command:

print(f"你说的是: {command}")

say(f"你说的是: {command}")

if "打开" in command:

app_to_open = command.replace("打开", "").strip()

launch_application(app_to_open, app_dict)

last_full_command = command

elif rec.PartialResult():

partial_result = json.loads(rec.PartialResult())['partial']

if partial_result and "打开" in partial_result and partial_result != last_partial_result:

print(f"部分结果: {partial_result}")

say(f"部分结果: {partial_result}")

last_partial_result = partial_result

except KeyboardInterrupt:

say("\n退出程序。")

finally:

if stream is not None:

stream.stop()

stream.close()

关键词部分，为了识别准确以及出现谐音内容可以增添多个关键词使用，作为分割

字典路径如果出现中文字符有可能会报错！

代码意义不大，如果考虑深入：可以尝试增加快捷键，以及相关应用接口可以更好控制

上班族打开电脑i第一件事情是启动相关应用，同样可以尝试多应用编组启动