IoT-For-Beginners/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py

import io
import json
import pyaudio
import requests
import time
import wave

from grove.factory import Factory
button = Factory.getButton('GPIO-HIGH', 5)

audio = pyaudio.PyAudio()
microphone_card_number = 1
speaker_card_number = 1
rate = 48000

def capture_audio():
    stream = audio.open(format = pyaudio.paInt16,
                        rate = rate,
                        channels = 1, 
                        input_device_index = microphone_card_number,
                        input = True,
                        frames_per_buffer = 4096)

    frames = []

    while button.is_pressed():
        frames.append(stream.read(4096))

    stream.stop_stream()
    stream.close()

    wav_buffer = io.BytesIO()
    with wave.open(wav_buffer, 'wb') as wavefile:
        wavefile.setnchannels(1)
        wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wavefile.setframerate(rate)
        wavefile.writeframes(b''.join(frames))
        wav_buffer.seek(0)

    return wav_buffer

api_key = '<key>'
location = '<location>'
language = '<language>'

def get_access_token():
    headers = {
        'Ocp-Apim-Subscription-Key': api_key
    }

    token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
    response = requests.post(token_endpoint, headers=headers)
    return str(response.text)

def convert_speech_to_text(buffer):
    url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1'

    headers = {
        'Authorization': 'Bearer ' + get_access_token(),
        'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}',
        'Accept': 'application/json;text/xml'
    }

    params = {
        'language': language
    }

    response = requests.post(url, headers=headers, params=params, data=buffer)
    response_json = json.loads(response.text)

    if response_json['RecognitionStatus'] == 'Success':
        return response_json['DisplayText']
    else:
        return ''

while True:
    while not button.is_pressed():
        time.sleep(.1)

    buffer = capture_audio()
    text = convert_speech_to_text(buffer)
    print(text)
lesson 21 (#81) * Adding content * Update en.json * Update README.md * Update TRANSLATIONS.md * Adding lesson tempolates * Fixing code files with each others code in * Update README.md * Adding lesson 16 * Adding virtual camera * Adding Wio Terminal camera capture * Adding wio terminal code * Adding SBC classification to lesson 16 * Adding challenge, review and assignment * Adding images and using new Azure icons * Update README.md * Update iot-reference-architecture.png * Adding structure for JulyOT links * Removing icons * Sketchnotes! * Create lesson-1.png * Starting on lesson 18 * Updated sketch * Adding virtual distance sensor * Adding Wio Terminal image classification * Update README.md * Adding structure for project 6 and wio terminal distance sensor * Adding some of the smart timer stuff * Updating sketchnotes * Adding virtual device speech to text * Adding chapter 21 4 years ago			`import io`
			`import json`
			`import pyaudio`
			`import requests`
			`import time`
			`import wave`

			`from grove.factory import Factory`
			`button = Factory.getButton('GPIO-HIGH', 5)`

			`audio = pyaudio.PyAudio()`
			`microphone_card_number = 1`
			`speaker_card_number = 1`
			`rate = 48000`

			`def capture_audio():`
			`stream = audio.open(format = pyaudio.paInt16,`
			`rate = rate,`
			`channels = 1,`
			`input_device_index = microphone_card_number,`
			`input = True,`
			`frames_per_buffer = 4096)`

			`frames = []`

			`while button.is_pressed():`
			`frames.append(stream.read(4096))`

			`stream.stop_stream()`
			`stream.close()`

			`wav_buffer = io.BytesIO()`
			`with wave.open(wav_buffer, 'wb') as wavefile:`
			`wavefile.setnchannels(1)`
			`wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))`
			`wavefile.setframerate(rate)`
			`wavefile.writeframes(b''.join(frames))`
			`wav_buffer.seek(0)`

			`return wav_buffer`

			`api_key = '<key>'`
			`location = '<location>'`
			`language = '<language>'`

			`def get_access_token():`
			`headers = {`
			`'Ocp-Apim-Subscription-Key': api_key`
			`}`

			`token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'`
			`response = requests.post(token_endpoint, headers=headers)`
			`return str(response.text)`

			`def convert_speech_to_text(buffer):`
			`url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1'`

			`headers = {`
			`'Authorization': 'Bearer ' + get_access_token(),`
			`'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}',`
			`'Accept': 'application/json;text/xml'`
			`}`

			`params = {`
			`'language': language`
			`}`

			`response = requests.post(url, headers=headers, params=params, data=buffer)`
			`response_json = json.loads(response.text)`

			`if response_json['RecognitionStatus'] == 'Success':`
			`return response_json['DisplayText']`
			`else:`
			`return ''`

			`while True:`
			`while not button.is_pressed():`
			`time.sleep(.1)`

			`buffer = capture_audio()`
			`text = convert_speech_to_text(buffer)`
			`print(text)`