import pyaudio
import wave
import time
import os
from shibie import BaiduYuYin, LuYin, imgtext
import time
# 录音进
while 1:
time.sleep(2)
LuYin(5,"wap.wav")
# 识别回值
rt = BaiduYuYin("wap.wav")
tr = 1
print(rt)
# 进行判断结果
if tr == 1:
if rt[:2] == "搜索":
print(rt)
strc ="start C:/Users/Administrator/AppData/Local/Google/Chrome/Application/chrome.exe https://www.baidu.com/s?wd=" + rt[2:]
os.system(strc)
tr = 0
elif rt[:2]== "打开":
print(rt)
if rt[2:] == "丁丁":
strc = "start F:/DingDing/main/current/DingTalk.exe"
os.system(strc)
tr = 0
else:
print("未知命令")
tr = 1
# print(imgtext("2.jpg"))
----------------------------------------------------
import pyaudio
import wave
import requests
import json
import base64
import os
import requests
import json
def LuYin(Time,filename):
CHUNK = 1024 #wav文件是由若干个CHUNK组成的,CHUNK我们就理解成数据包或者数据片段。
FORMAT = pyaudio.paInt16 #这个参数后面写的pyaudio.paInt16表示我们使用量化位数 16位来进行录音。
CHANNELS = 1 #代表的是声道,这里使用的单声道。
RATE = 16000 # 采样率16k
RECORD_SECONDS = Time #采样时间
WAVE_OUTPUT_FILENAME = filename #输出文件名
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* 录音开始")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* 录音结束")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
# 语音识别
def Gettokent():
baidu_server = "https://openapi.baidu.com/oauth/2.0/token?"
grant_type = "client_credentials"
#API Key
client_id = ""
#Secret Key
client_secret = ""
#拼url
url = 'https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(client_id, client_secret)
#print(url)
#获取token
res = requests.post(url)
#print(res.text)
token = json.loads(res.text)["access_token"]
return token
# 文字识别
def Getr():
baidu_server = "https://openapi.baidu.com/oauth/2.0/token?"
grant_type = "client_credentials"
#API Key
client_id = ""
#Secret Key
client_secret = ""
#拼url
url = 'https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret={}'.format(client_id, client_secret)
#print(url)
#获取token
res = requests.post(url)
#print(res.text)
token = json.loads(res.text)["access_token"]
return token
def BaiduYuYin(fileurl):
try:
RATE = "16000" #采样率16KHz
FORMAT = "wav" #wav格式
CUID = "wate_play"
DEV_PID = "1537" #无标点普通话
token = Gettokent()
# 以字节格式读取文件之后进行编码
with open(fileurl, "rb") as f:
speech = base64.b64encode(f.read()).decode('utf8')
size = os.path.getsize(fileurl)
headers = {'Content-Type': 'application/json'}
url = "https://vop.baidu.com/server_api"
data = {
"format": FORMAT,
"rate": RATE,
"dev_pid": DEV_PID,
"speech": speech,
"cuid": CUID,
"len": size,
"channel": 1,
"token": token,
}
req = requests.post(url, json.dumps(data), headers)
result = json.loads(req.text)
return result["result"][0][:-1]
except:
return '识别不清'
import urllib.parse, urllib.request, base64
def imgtext(urlk):
url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + Getr()
# 二进制方式打开图文件
f = open(r"" + urlk, 'rb')#本地图片
# 参数image:图像base64编码
img = base64.b64encode(f.read())
params = {"image": img}
params = urllib.parse.urlencode(params).encode(encoding='UTF8')
request = urllib.request.Request(url, params)
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
response = urllib.request.urlopen(request)
content = response.read()