You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
IoT-For-Beginners/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py

84 lines
2.1 KiB

import io
import pyaudio
import requests
import time
import wave
from grove.factory import Factory
button = Factory.getButton('GPIO-HIGH', 5)
audio = pyaudio.PyAudio()
microphone_card_number = 1
speaker_card_number = 1
rate = 48000
def capture_audio():
stream = audio.open(format = pyaudio.paInt16,
rate = rate,
channels = 1,
input_device_index = microphone_card_number,
input = True,
frames_per_buffer = 4096)
frames = []
while button.is_pressed():
frames.append(stream.read(4096))
stream.stop_stream()
stream.close()
wav_buffer = io.BytesIO()
with wave.open(wav_buffer, 'wb') as wavefile:
wavefile.setnchannels(1)
wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
wavefile.setframerate(rate)
wavefile.writeframes(b''.join(frames))
wav_buffer.seek(0)
return wav_buffer
speech_api_key = '<key>'
location = '<location>'
language = '<language>'
def get_access_token():
headers = {
'Ocp-Apim-Subscription-Key': speech_api_key
}
token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
response = requests.post(token_endpoint, headers=headers)
return str(response.text)
def convert_speech_to_text(buffer):
url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1'
headers = {
'Authorization': 'Bearer ' + get_access_token(),
'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}',
'Accept': 'application/json;text/xml'
}
params = {
'language': language
}
response = requests.post(url, headers=headers, params=params, data=buffer)
response_json = response.json()
if response_json['RecognitionStatus'] == 'Success':
return response_json['DisplayText']
else:
return ''
def process_text(text):
print(text)
while True:
while not button.is_pressed():
time.sleep(.1)
buffer = capture_audio()
text = convert_speech_to_text(buffer)
process_text(text)