From 9f3d1af3b2b88606f8b97be8d479c3da35abd0bb Mon Sep 17 00:00:00 2001 From: Jim Bennett Date: Thu, 17 Jun 2021 18:38:45 -0700 Subject: [PATCH] Adding speech to text lab for Pi --- .../2-language-understanding/README.md | 8 +- .../speech-trigger/__init__.py | 8 +- .../lessons/3-spoken-feedback/README.md | 27 ++- .../functions/smart-timer-trigger/host.json | 15 ++ .../smart-timer-trigger/local.settings.json | 12 ++ .../smart-timer-trigger/requirements.txt | 4 + .../speech-trigger/__init__.py | 60 ++++++ .../speech-trigger/function.json | 15 ++ .../pi/smart-timer/app.py | 184 ++++++++++++++++++ .../code-timer/pi/smart-timer/app.py | 130 +++++++++++++ .../virtual-iot-device/smart-timer/app.py | 69 +++++++ .../3-spoken-feedback/pi-text-to-speech.md | 136 +++++++++++++ .../single-board-computer-set-timer.md | 97 +++++++++ .../virtual-device-text-to-speech.md | 4 + .../wio-terminal-set-timer.md | 3 + .../wio-terminal-text-to-speech.md | 3 + 16 files changed, 762 insertions(+), 13 deletions(-) create mode 100644 6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/host.json create mode 100644 6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/local.settings.json create mode 100644 6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/requirements.txt create mode 100644 6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/__init__.py create mode 100644 6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/function.json create mode 100644 6-consumer/lessons/3-spoken-feedback/code-spoken-response/pi/smart-timer/app.py create mode 100644 6-consumer/lessons/3-spoken-feedback/code-timer/pi/smart-timer/app.py create mode 100644 6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py create mode 100644 6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md create mode 100644 6-consumer/lessons/3-spoken-feedback/single-board-computer-set-timer.md create mode 100644 6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md create mode 100644 6-consumer/lessons/3-spoken-feedback/wio-terminal-set-timer.md create mode 100644 6-consumer/lessons/3-spoken-feedback/wio-terminal-text-to-speech.md diff --git a/6-consumer/lessons/2-language-understanding/README.md b/6-consumer/lessons/2-language-understanding/README.md index 811bd1d7..29f99ae5 100644 --- a/6-consumer/lessons/2-language-understanding/README.md +++ b/6-consumer/lessons/2-language-understanding/README.md @@ -347,7 +347,7 @@ Once published, the LUIS model can be called from code. In the last lesson you s if prediction_response.prediction.top_intent == 'set timer': numbers = prediction_response.prediction.entities['number'] time_units = prediction_response.prediction.entities['time unit'] - total_time = 0 + total_seconds = 0 ``` The `number` entities wil be an array of numbers. For example, if you said *"Set a four minute 17 second timer."*, then the `number` array will contain 2 integers - 4 and 17. @@ -392,15 +392,15 @@ Once published, the LUIS model can be called from code. In the last lesson you s ```python if time_unit == 'minute': - total_time += number * 60 + total_seconds += number * 60 else: - total_time += number + total_seconds += number ``` 1. Finally, outside this loop through the entities, log the total time for the timer: ```python - logging.info(f'Timer required for {total_time} seconds') + logging.info(f'Timer required for {total_seconds} seconds') ``` 1. Run the function app and speak into your IoT device. You will see the total time for the timer in the function app output: diff --git a/6-consumer/lessons/2-language-understanding/code/functions/smart-timer-trigger/speech-trigger/__init__.py b/6-consumer/lessons/2-language-understanding/code/functions/smart-timer-trigger/speech-trigger/__init__.py index e6608607..1b9f3ac5 100644 --- a/6-consumer/lessons/2-language-understanding/code/functions/smart-timer-trigger/speech-trigger/__init__.py +++ b/6-consumer/lessons/2-language-understanding/code/functions/smart-timer-trigger/speech-trigger/__init__.py @@ -28,16 +28,16 @@ def main(events: List[func.EventHubEvent]): if prediction_response.prediction.top_intent == 'set timer': numbers = prediction_response.prediction.entities['number'] time_units = prediction_response.prediction.entities['time unit'] - total_time = 0 + total_seconds = 0 for i in range(0, len(numbers)): number = numbers[i] time_unit = time_units[i][0] if time_unit == 'minute': - total_time += number * 60 + total_seconds += number * 60 else: - total_time += number + total_seconds += number - logging.info(f'Timer required for {total_time} seconds') + logging.info(f'Timer required for {total_seconds} seconds') diff --git a/6-consumer/lessons/3-spoken-feedback/README.md b/6-consumer/lessons/3-spoken-feedback/README.md index aac7471a..b114baec 100644 --- a/6-consumer/lessons/3-spoken-feedback/README.md +++ b/6-consumer/lessons/3-spoken-feedback/README.md @@ -26,6 +26,18 @@ In this lesson we'll cover: ## Text to speech + + + + + + + + + + + + ## Set the timer The timer can be set by sending a command from the serverless code, instructing the IoT device to set the timer. This command will contain the time in seconds till the timer needs to go off. @@ -38,11 +50,11 @@ The timer can be set by sending a command from the serverless code, instructing You will need to set up the connection string for the IoT Hub with the service policy (*NOT* the device) in your `local.settings.json` file and add the `azure-iot-hub` pip package to your `requirements.txt` file. The device ID can be extracted from the event. -1. The direct method you send needs to be called `set-timer`, and will need to send the length of the timer as a JSON property called `time`. Use the following code to build the `CloudToDeviceMethod` using the `total_time` calculated from the data extracted by LUIS: +1. The direct method you send needs to be called `set-timer`, and will need to send the length of the timer as a JSON property called `seconds`. Use the following code to build the `CloudToDeviceMethod` using the `total_seconds` calculated from the data extracted by LUIS: ```python payload = { - 'time': total_time + 'seconds': total_seconds } direct_method = CloudToDeviceMethod(method_name='set-timer', payload=json.dumps(payload)) ``` @@ -60,11 +72,16 @@ The timer can be set by sending a command from the serverless code, instructing * [Arduino - Wio Terminal](wio-terminal-set-timer.md) * [Single-board computer - Raspberry Pi/Virtual IoT device](single-board-computer-set-timer.md) -> 💁 You can find this code in the [code-command/wio-terminal](code-command/wio-terminal), [code-command/virtual-device](code-command/virtual-device), or [code-command/pi](code-command/pi) folder. - ## Convert text to speech -The same speech service you used to convert speech to text can be used to convert text back into speech, and this can be played through a microphone on your IoT device. +The same speech service you used to convert speech to text can be used to convert text back into speech, and this can be played through a speaker on your IoT device. + +Voices, neural, others + +SSML + + + ### Task - convert text to speech diff --git a/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/host.json b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/host.json new file mode 100644 index 00000000..291065f8 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/host.json @@ -0,0 +1,15 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[2.*, 3.0.0)" + } +} \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/local.settings.json b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/local.settings.json new file mode 100644 index 00000000..8b5b956e --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/local.settings.json @@ -0,0 +1,12 @@ +{ + "IsEncrypted": false, + "Values": { + "FUNCTIONS_WORKER_RUNTIME": "python", + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "IOT_HUB_CONNECTION_STRING": "", + "LUIS_KEY": "", + "LUIS_ENDPOINT_URL": "", + "LUIS_APP_ID": "", + "REGISTRY_MANAGER_CONNECTION_STRING": "" + } +} \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/requirements.txt b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/requirements.txt new file mode 100644 index 00000000..d0405a38 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/requirements.txt @@ -0,0 +1,4 @@ +# Do not include azure-functions-worker as it may conflict with the Azure Functions platform + +azure-functions +azure-cognitiveservices-language-luis \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/__init__.py b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/__init__.py new file mode 100644 index 00000000..be8e5eee --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/__init__.py @@ -0,0 +1,60 @@ +from typing import List +import logging + +import azure.functions as func + +import json +import os +from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient +from msrest.authentication import CognitiveServicesCredentials + +from azure.iot.hub import IoTHubRegistryManager +from azure.iot.hub.models import CloudToDeviceMethod + +def main(events: List[func.EventHubEvent]): + luis_key = os.environ['LUIS_KEY'] + endpoint_url = os.environ['LUIS_ENDPOINT_URL'] + app_id = os.environ['LUIS_APP_ID'] + registry_manager_connection_string = os.environ['REGISTRY_MANAGER_CONNECTION_STRING'] + + credentials = CognitiveServicesCredentials(luis_key) + client = LUISRuntimeClient(endpoint=endpoint_url, credentials=credentials) + + for event in events: + logging.info('Python EventHub trigger processed an event: %s', + event.get_body().decode('utf-8')) + + device_id = event.iothub_metadata['connection-device-id'] + + event_body = json.loads(event.get_body().decode('utf-8')) + prediction_request = { 'query' : event_body['speech'] } + + prediction_response = client.prediction.get_slot_prediction(app_id, 'Staging', prediction_request) + + if prediction_response.prediction.top_intent == 'set timer': + numbers = prediction_response.prediction.entities['number'] + time_units = prediction_response.prediction.entities['time unit'] + total_seconds = 0 + + for i in range(0, len(numbers)): + number = numbers[i] + time_unit = time_units[i][0] + + if time_unit == 'minute': + total_seconds += number * 60 + else: + total_seconds += number + + logging.info(f'Timer required for {total_seconds} seconds') + + payload = { + 'seconds': total_seconds + } + direct_method = CloudToDeviceMethod(method_name='set-timer', payload=json.dumps(payload)) + + registry_manager_connection_string = os.environ['REGISTRY_MANAGER_CONNECTION_STRING'] + registry_manager = IoTHubRegistryManager(registry_manager_connection_string) + + registry_manager.invoke_device_method(device_id, direct_method) + + diff --git a/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/function.json b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/function.json new file mode 100644 index 00000000..0117bdf5 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-command/functions/smart-timer-trigger/speech-trigger/function.json @@ -0,0 +1,15 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "type": "eventHubTrigger", + "name": "events", + "direction": "in", + "eventHubName": "samples-workitems", + "connection": "IOT_HUB_CONNECTION_STRING", + "cardinality": "many", + "consumerGroup": "$Default", + "dataType": "binary" + } + ] +} \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-spoken-response/pi/smart-timer/app.py b/6-consumer/lessons/3-spoken-feedback/code-spoken-response/pi/smart-timer/app.py new file mode 100644 index 00000000..40bce464 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-spoken-response/pi/smart-timer/app.py @@ -0,0 +1,184 @@ +import io +import json +import pyaudio +import requests +import time +import wave +import threading + +from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse + +from grove.factory import Factory +button = Factory.getButton('GPIO-HIGH', 5) + +audio = pyaudio.PyAudio() +microphone_card_number = 1 +speaker_card_number = 1 +rate = 16000 + +def capture_audio(): + stream = audio.open(format = pyaudio.paInt16, + rate = rate, + channels = 1, + input_device_index = microphone_card_number, + input = True, + frames_per_buffer = 4096) + + frames = [] + + while button.is_pressed(): + frames.append(stream.read(4096)) + + stream.stop_stream() + stream.close() + + wav_buffer = io.BytesIO() + with wave.open(wav_buffer, 'wb') as wavefile: + wavefile.setnchannels(1) + wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) + wavefile.setframerate(rate) + wavefile.writeframes(b''.join(frames)) + wav_buffer.seek(0) + + return wav_buffer + +api_key = '' +location = '' +language = '' +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +def get_access_token(): + headers = { + 'Ocp-Apim-Subscription-Key': api_key + } + + token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken' + response = requests.post(token_endpoint, headers=headers) + return str(response.text) + +def convert_speech_to_text(buffer): + url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1' + + headers = { + 'Authorization': 'Bearer ' + get_access_token(), + 'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}', + 'Accept': 'application/json;text/xml' + } + + params = { + 'language': language + } + + response = requests.post(url, headers=headers, params=params, data=buffer) + response_json = json.loads(response.text) + + if response_json['RecognitionStatus'] == 'Success': + return response_json['DisplayText'] + else: + return '' + +def get_voice(): + url = f'https://{location}.tts.speech.microsoft.com/cognitiveservices/voices/list' + + headers = { + 'Authorization': 'Bearer ' + get_access_token() + } + + response = requests.get(url, headers=headers) + voices_json = json.loads(response.text) + + first_voice = next(x for x in voices_json if x['Locale'].lower() == language.lower()) + return first_voice['ShortName'] + +voice = get_voice() +print(f"Using voice {voice}") + +playback_format = 'riff-48khz-16bit-mono-pcm' + +def get_speech(text): + url = f'https://{location}.tts.speech.microsoft.com/cognitiveservices/v1' + + headers = { + 'Authorization': 'Bearer ' + get_access_token(), + 'Content-Type': 'application/ssml+xml', + 'X-Microsoft-OutputFormat': playback_format + } + + ssml = f'' + ssml += f'' + ssml += text + ssml += '' + ssml += '' + + response = requests.post(url, headers=headers, data=ssml.encode('utf-8')) + return io.BytesIO(response.content) + +def play_speech(speech): + with wave.open(speech, 'rb') as wave_file: + stream = audio.open(format=audio.get_format_from_width(wave_file.getsampwidth()), + channels=wave_file.getnchannels(), + rate=wave_file.getframerate(), + output_device_index=speaker_card_number, + output=True) + + data = wave_file.readframes(4096) + + while len(data) > 0: + stream.write(data) + data = wave_file.readframes(4096) + + stream.stop_stream() + stream.close() + +def say(text): + speech = get_speech(text) + play_speech(speech) + +def announce_timer(minutes, seconds): + announcement = 'Times up on your ' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer.' + say(announcement) + +def create_timer(total_seconds): + minutes, seconds = divmod(total_seconds, 60) + threading.Timer(total_seconds, announce_timer, args=[minutes, seconds]).start() + announcement = '' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer started.' + say(announcement) + +def handle_method_request(request): + if request.name == 'set-timer': + payload = json.loads(request.payload) + seconds = payload['seconds'] + if seconds > 0: + create_timer(payload['seconds']) + + method_response = MethodResponse.create_from_method_request(request, 200) + device_client.send_method_response(method_response) + +device_client.on_method_request_received = handle_method_request + +while True: + while not button.is_pressed(): + time.sleep(.1) + + buffer = capture_audio() + text = convert_speech_to_text(buffer) + if len(text) > 0: + print(text) + message = Message(json.dumps({ 'speech': text })) + device_client.send_message(message) \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-timer/pi/smart-timer/app.py b/6-consumer/lessons/3-spoken-feedback/code-timer/pi/smart-timer/app.py new file mode 100644 index 00000000..1a8a6226 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-timer/pi/smart-timer/app.py @@ -0,0 +1,130 @@ +import io +import json +import pyaudio +import requests +import time +import wave +import threading + +from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse + +from grove.factory import Factory +button = Factory.getButton('GPIO-HIGH', 5) + +audio = pyaudio.PyAudio() +microphone_card_number = 1 +speaker_card_number = 1 +rate = 16000 + +def capture_audio(): + stream = audio.open(format = pyaudio.paInt16, + rate = rate, + channels = 1, + input_device_index = microphone_card_number, + input = True, + frames_per_buffer = 4096) + + frames = [] + + while button.is_pressed(): + frames.append(stream.read(4096)) + + stream.stop_stream() + stream.close() + + wav_buffer = io.BytesIO() + with wave.open(wav_buffer, 'wb') as wavefile: + wavefile.setnchannels(1) + wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) + wavefile.setframerate(rate) + wavefile.writeframes(b''.join(frames)) + wav_buffer.seek(0) + + return wav_buffer + +api_key = '' +location = '' +language = '' +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +def get_access_token(): + headers = { + 'Ocp-Apim-Subscription-Key': api_key + } + + token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken' + response = requests.post(token_endpoint, headers=headers) + return str(response.text) + +def convert_speech_to_text(buffer): + url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1' + + headers = { + 'Authorization': 'Bearer ' + get_access_token(), + 'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}', + 'Accept': 'application/json;text/xml' + } + + params = { + 'language': language + } + + response = requests.post(url, headers=headers, params=params, data=buffer) + response_json = json.loads(response.text) + + if response_json['RecognitionStatus'] == 'Success': + return response_json['DisplayText'] + else: + return '' + +def say(text): + print(text) + +def announce_timer(minutes, seconds): + announcement = 'Times up on your ' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer.' + say(announcement) + +def create_timer(total_seconds): + minutes, seconds = divmod(total_seconds, 60) + threading.Timer(total_seconds, announce_timer, args=[minutes, seconds]).start() + announcement = '' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer started.' + say(announcement) + +def handle_method_request(request): + if request.name == 'set-timer': + payload = json.loads(request.payload) + seconds = payload['seconds'] + if seconds > 0: + create_timer(payload['seconds']) + + method_response = MethodResponse.create_from_method_request(request, 200) + device_client.send_method_response(method_response) + +device_client.on_method_request_received = handle_method_request + +while True: + while not button.is_pressed(): + time.sleep(.1) + + buffer = capture_audio() + text = convert_speech_to_text(buffer) + if len(text) > 0: + print(text) + message = Message(json.dumps({ 'speech': text })) + device_client.send_message(message) \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py new file mode 100644 index 00000000..51047a95 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py @@ -0,0 +1,69 @@ +import json +import threading +import time +from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer +from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse + +api_key = '' +location = '' +language = '' +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +speech_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) + +recognizer = SpeechRecognizer(speech_config=speech_config) + +def recognized(args): + if len(args.result.text) > 0: + message = Message(json.dumps({ 'speech': args.result.text })) + device_client.send_message(message) + +recognizer.recognized.connect(recognized) + +recognizer.start_continuous_recognition() + +def say(text): + print(text) + +def announce_timer(minutes, seconds): + announcement = 'Times up on your ' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer.' + say(announcement) + +def create_timer(total_seconds): + minutes, seconds = divmod(total_seconds, 60) + threading.Timer(total_seconds, announce_timer, args=[minutes, seconds]).start() + announcement = '' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer started.' + say(announcement) + +def handle_method_request(request): + if request.name == 'set-timer': + payload = json.loads(request.payload) + seconds = payload['seconds'] + if seconds > 0: + create_timer(payload['seconds']) + + method_response = MethodResponse.create_from_method_request(request, 200) + device_client.send_method_response(method_response) + +device_client.on_method_request_received = handle_method_request + +while True: + time.sleep(1) \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md b/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md new file mode 100644 index 00000000..c984c2e2 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md @@ -0,0 +1,136 @@ +# Text to speech - Raspberry Pi + +In this part of the lesson, you will write code to convert text to speech using the speech service. + +## Convert text to speech using the speech service + +The text can be sent to the speech service using the REST API to get speech as an audio file that can be played back on your IoT device. When requesting speech, you need to provide the voice to use as speech can be generated using a variety of different voices. + +Each language supports a range of different voices, and you can make a REST request against the speech service to get the list of supported voices for each language. + +### Task - get a voice + +1. Add the following code above the `say` function to request the list of voices for a language: + + ```python + def get_voice(): + url = f'https://{location}.tts.speech.microsoft.com/cognitiveservices/voices/list' + + headers = { + 'Authorization': 'Bearer ' + get_access_token() + } + + response = requests.get(url, headers=headers) + voices_json = json.loads(response.text) + + first_voice = next(x for x in voices_json if x['Locale'].lower() == language.lower() and x['VoiceType'] == 'Neural') + return first_voice['ShortName'] + + voice = get_voice() + print(f"Using voice {voice}") + ``` + + This code defines a function called `get_voice` that uses the speech service to get a list of voices. It then finds the first voice that matches the language that is being used. + + This function is then called to store the first voice, and the voice name is printed to the console. This voice can be requested once and the value used for every call to convert text to speech. + + > 💁 You can get the full list of supported voices from the [Language and voice support documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support?WT.mc_id=academic-17441-jabenn#text-to-speech). If you want to use a specific voice, then you can remove this function and hard code the voice to the voice name from this documentation. + +### Task - convert text to speech + +1. Below this, define a constant for the audio format to be retrieved from the speech services. When you request audio, you can do it in a range of different formats. + + ```python + playback_format = 'riff-48khz-16bit-mono-pcm' + ``` + + The format you can use depends on your hardware. If you get `Invalid sample rate` errors when playing the audio then change this to another value. You can find the list of supported values in the [Text to speech REST API documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/rest-text-to-speech?WT.mc_id=academic-17441-jabenn#audio-outputs). You will need to use `riff` format audio, and the values to try are `riff-16khz-16bit-mono-pcm`, `riff-24khz-16bit-mono-pcm` and `riff-48khz-16bit-mono-pcm`. + +1. Below this, declare a function called `get_speech` that will convert the text to speech using the speech service REST API: + + ```python + def get_speech(text): + ``` + +1. In the `get_speech` function, define the URL to call and the headers to pass: + + ```python + url = f'https://{location}.tts.speech.microsoft.com/cognitiveservices/v1' + + headers = { + 'Authorization': 'Bearer ' + get_access_token(), + 'Content-Type': 'application/ssml+xml', + 'X-Microsoft-OutputFormat': playback_format + } + ``` + + This set the headers to use a generated access token, set the content to SSML and define the audio format needed. + +1. Below this, define the SSML to send to the REST API: + + ```python + ssml = f'' + ssml += f'' + ssml += text + ssml += '' + ssml += '' + ``` + + This SSML sets the language and the voice to use, along with the text to convert. + +1. Finally, add code in this function to make the REST request and return the binary audio data: + + ```python + response = requests.post(url, headers=headers, data=ssml.encode('utf-8')) + return io.BytesIO(response.content) + ``` + +### Task - play the audio + +1. Below the `get_speech` function, define a new function to play the audio returned by the REST API call: + + ```python + def play_speech(speech): + ``` + +1. The `speech` passed to this function will be the binary audio data returned from the REST API. Use the following code to open this as a wave file and pass it to PyAudio to play the audio: + + ```python + def play_speech(speech): + with wave.open(speech, 'rb') as wave_file: + stream = audio.open(format=audio.get_format_from_width(wave_file.getsampwidth()), + channels=wave_file.getnchannels(), + rate=wave_file.getframerate(), + output_device_index=speaker_card_number, + output=True) + + data = wave_file.readframes(4096) + + while len(data) > 0: + stream.write(data) + data = wave_file.readframes(4096) + + stream.stop_stream() + stream.close() + ``` + + This code uses a PyAudio stream, the same as capturing audio. The difference here is the stream is set as an output stream, and data is read from the audio data and pushed to the stream. + + Rather than hard coding the stream details such as the sample rate, it is read from the audio data. + +1. Replace the contents of the `say` function to the following: + + ```python + speech = get_speech(text) + play_speech(speech) + ``` + + This code converts the text to speech as binary audio data, and plays the audio. + +1. Run the app, and ensure the function app is also running. Set some timers, and you will hear a spoken response saying that your timer has been set, then another spoken response when the timer is complete. + + If you get `Invalid sample rate` errors, change the `playback_format` as described above. + +> 💁 You can find this code in the [code-spoken-response/pi](code-timer/spoken-response) folder. + +😀 Your timer program was a success! diff --git a/6-consumer/lessons/3-spoken-feedback/single-board-computer-set-timer.md b/6-consumer/lessons/3-spoken-feedback/single-board-computer-set-timer.md new file mode 100644 index 00000000..efa3b9e1 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/single-board-computer-set-timer.md @@ -0,0 +1,97 @@ +# Set a timer - Virtual IoT Hardware and Raspberry Pi + +In this part of the lesson, you will set a timer on your virtual IoT device or Raspberry Pi based off a command from the IoT Hub. + +## Set a timer + +The command sent from the serverless function contains the time for the timer in seconds as the payload. This time can be used to set a timer. + +Timers can be set using the Python `threading.Timer` class. This class takes a delay time and a function, and after the delay time, the function is executed. + +### Task - set a timer + +1. Open the `smart-timer` project in VS Code, and ensure the virtual environment is loaded in the terminal if you are using a virtual IoT device. + +1. Add the following import statement at the top of the file to import the threading Python library: + + ```python + import threading + ``` + +1. Above the `handle_method_request` function that handles the method request, add a function to speak a response. Fow now this will just write to the console, but later in this lesson this will speak the text. + + ```python + def say(text): + print(text) + ``` + +1. Below this add a function that will be called by a timer to announce that the timer is complete: + + ```python + def announce_timer(minutes, seconds): + announcement = 'Times up on your ' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer.' + say(announcement) + ``` + + This function takes the number of minutes and seconds for the timer, and builds a sentence to say that the timer is complete. It will check the number of minutes and seconds, and only include each time unit if it has a number. For example, if the number of minutes is 0 then only seconds are included in the message. This sentence is then sent to the `say` function. + +1. Below this, add the following `create_timer` function to create a timer: + + ```python + def create_timer(seconds): + minutes, seconds = divmod(seconds, 60) + threading.Timer(seconds, announce_timer, args=[minutes, seconds]).start() + ``` + + This function takes the total number of seconds for the timer that will be sent in the command, and converts this to minutes and seconds. It then creates and starts a timer object using the total number of seconds, passing in the `announce_timer` function and a list containing the minutes and seconds. When the timer elapses, it will call the `announce_timer` function, and pass the contents of this list as the parameters - so the first item in the list gets passes as the `minutes` parameter, and the second item as the `seconds` parameter. + +1. To the end of the `create_timer` function, add some code to build a message to be spoken to the user to announce that the timer is starting: + + ```python + announcement = '' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer started.' + say(announcement) + ``` + + Again, this only includes the time unit that has a value. This sentence is then sent to the `say` function. + +1. At the start of the `handle_method_request` function, add the following code to check that the `set-timer` direct method was requested: + + ```python + if request.name == 'set-timer': + ``` + +1. Inside this `if` statement, extract the timer time in seconds from the payload and use this to create a timer: + + ```python + payload = json.loads(request.payload) + seconds = payload['seconds'] + if seconds > 0: + create_timer(payload['seconds']) + ``` + + The timer is only created if the number of seconds is greater than 0 + +1. Run the app, and ensure the function app is also running. Set some timers, and the output will show the timer being set, and then will show when it elapses: + + ```output + pi@raspberrypi:~/smart-timer $ python3 app.py + Connecting + Connected + Set a one minute 4 second timer. + 1 minute, 4 second timer started + Times up on your 1 minute, 4 second timer + ``` + +> 💁 You can find this code in the [code-timer/pi](code-timer/pi) or [code-timer/virtual-iot-device](code-timer/virtual-iot-device) folder. + +😀 Your timer program was a success! diff --git a/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md b/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md new file mode 100644 index 00000000..743f3890 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md @@ -0,0 +1,4 @@ +# Text to speech - Virtual IoT device + + +https://docs.microsoft.com/en-us/dotnet/api/microsoft.cognitiveservices.speech.speechsynthesisoutputformat?view=azure-dotnet \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/wio-terminal-set-timer.md b/6-consumer/lessons/3-spoken-feedback/wio-terminal-set-timer.md new file mode 100644 index 00000000..2e8910e5 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/wio-terminal-set-timer.md @@ -0,0 +1,3 @@ +# Set a timer - Wio Terminal + +Coming soon diff --git a/6-consumer/lessons/3-spoken-feedback/wio-terminal-text-to-speech.md b/6-consumer/lessons/3-spoken-feedback/wio-terminal-text-to-speech.md new file mode 100644 index 00000000..e27369e6 --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/wio-terminal-text-to-speech.md @@ -0,0 +1,3 @@ +# Text to speech - Wio Terminal + +Coming soon