diff --git a/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py index 59df6635..ae111d1d 100644 --- a/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py +++ b/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py @@ -14,11 +14,11 @@ print('Connecting') device_client.connect() print('Connected') -speech_config = SpeechConfig(subscription=api_key, - region=location, - speech_recognition_language=language) +recognizer_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) -recognizer = SpeechRecognizer(speech_config=speech_config) +recognizer = SpeechRecognizer(speech_config=recognizer_config) def recognized(args): if len(args.result.text) > 0: diff --git a/6-consumer/lessons/1-speech-recognition/code-speech-to-text/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-speech-to-text/virtual-iot-device/smart-timer/app.py index 15632657..355b9c27 100644 --- a/6-consumer/lessons/1-speech-recognition/code-speech-to-text/virtual-iot-device/smart-timer/app.py +++ b/6-consumer/lessons/1-speech-recognition/code-speech-to-text/virtual-iot-device/smart-timer/app.py @@ -5,11 +5,11 @@ api_key = '' location = '' language = '' -speech_config = SpeechConfig(subscription=api_key, - region=location, - speech_recognition_language=language) +recognizer_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) -recognizer = SpeechRecognizer(speech_config=speech_config) +recognizer = SpeechRecognizer(speech_config=recognizer_config) def recognized(args): print(args.result.text) diff --git a/6-consumer/lessons/1-speech-recognition/virtual-device-speech-to-text.md b/6-consumer/lessons/1-speech-recognition/virtual-device-speech-to-text.md index 79618b59..02e29b85 100644 --- a/6-consumer/lessons/1-speech-recognition/virtual-device-speech-to-text.md +++ b/6-consumer/lessons/1-speech-recognition/virtual-device-speech-to-text.md @@ -45,9 +45,9 @@ On Windows, Linux, and macOS, the speech services Python SDK can be used to list location = '' language = '' - speech_config = SpeechConfig(subscription=api_key, - region=location, - speech_recognition_language=language) + recognizer_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) ``` Replace `` with the API key for your speech service. Replace `` with the location you used when you created the speech service resource. @@ -59,7 +59,7 @@ On Windows, Linux, and macOS, the speech services Python SDK can be used to list 1. Add the following code to create a speech recognizer: ```python - recognizer = SpeechRecognizer(speech_config=speech_config) + recognizer = SpeechRecognizer(speech_config=recognizer_config) ``` 1. The speech recognizer runs on a background thread, listening for audio and converting any speech in it to text. You can get the text using a callback function - a function you define and pass to the recognizer. Every time speech is detected, the callback is called. Add the following code to define a callback that prints the text to the console, and pass this callback to the recognizer: diff --git a/6-consumer/lessons/3-spoken-feedback/code-spoken-response/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/3-spoken-feedback/code-spoken-response/virtual-iot-device/smart-timer/app.py new file mode 100644 index 00000000..3b2e7dcf --- /dev/null +++ b/6-consumer/lessons/3-spoken-feedback/code-spoken-response/virtual-iot-device/smart-timer/app.py @@ -0,0 +1,78 @@ +import json +import threading +import time +from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer, SpeechSynthesizer +from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse + +api_key = '' +location = '' +language = '' +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +recognizer_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) + +recognizer = SpeechRecognizer(speech_config=recognizer_config) + +def recognized(args): + if len(args.result.text) > 0: + message = Message(json.dumps({ 'speech': args.result.text })) + device_client.send_message(message) + +recognizer.recognized.connect(recognized) + +recognizer.start_continuous_recognition() + +speech_config = SpeechConfig(subscription=api_key, + region=location) +speech_config.speech_synthesis_language = language +speech_synthesizer = SpeechSynthesizer(speech_config=speech_config) + +voices = speech_synthesizer.get_voices_async().get().voices +first_voice = next(x for x in voices if x.locale.lower() == language.lower()) +speech_config.speech_synthesis_voice_name = first_voice.short_name + +def say(text): + speech_synthesizer.speak_text(text) + +def announce_timer(minutes, seconds): + announcement = 'Times up on your ' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer.' + say(announcement) + +def create_timer(total_seconds): + minutes, seconds = divmod(total_seconds, 60) + threading.Timer(total_seconds, announce_timer, args=[minutes, seconds]).start() + announcement = '' + if minutes > 0: + announcement += f'{minutes} minute' + if seconds > 0: + announcement += f'{seconds} second' + announcement += ' timer started.' + say(announcement) + +def handle_method_request(request): + if request.name == 'set-timer': + payload = json.loads(request.payload) + seconds = payload['seconds'] + if seconds > 0: + create_timer(payload['seconds']) + + method_response = MethodResponse.create_from_method_request(request, 200) + device_client.send_method_response(method_response) + +device_client.on_method_request_received = handle_method_request + +while True: + time.sleep(1) \ No newline at end of file diff --git a/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py index 51047a95..f6f8ed07 100644 --- a/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py +++ b/6-consumer/lessons/3-spoken-feedback/code-timer/virtual-iot-device/smart-timer/app.py @@ -15,11 +15,11 @@ print('Connecting') device_client.connect() print('Connected') -speech_config = SpeechConfig(subscription=api_key, - region=location, - speech_recognition_language=language) +recognizer_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) -recognizer = SpeechRecognizer(speech_config=speech_config) +recognizer = SpeechRecognizer(speech_config=recognizer_config) def recognized(args): if len(args.result.text) > 0: diff --git a/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md b/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md index c984c2e2..2c961e3a 100644 --- a/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md +++ b/6-consumer/lessons/3-spoken-feedback/pi-text-to-speech.md @@ -34,7 +34,11 @@ Each language supports a range of different voices, and you can make a REST requ This function is then called to store the first voice, and the voice name is printed to the console. This voice can be requested once and the value used for every call to convert text to speech. - > 💁 You can get the full list of supported voices from the [Language and voice support documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support?WT.mc_id=academic-17441-jabenn#text-to-speech). If you want to use a specific voice, then you can remove this function and hard code the voice to the voice name from this documentation. + > 💁 You can get the full list of supported voices from the [Language and voice support documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support?WT.mc_id=academic-17441-jabenn#text-to-speech). If you want to use a specific voice, then you can remove this function and hard code the voice to the voice name from this documentation. For example: + > + > ```python + > voice = 'hi-IN-SwaraNeural' + > ``` ### Task - convert text to speech @@ -131,6 +135,6 @@ Each language supports a range of different voices, and you can make a REST requ If you get `Invalid sample rate` errors, change the `playback_format` as described above. -> 💁 You can find this code in the [code-spoken-response/pi](code-timer/spoken-response) folder. +> 💁 You can find this code in the [code-spoken-response/pi](code-spoken-response/pi) folder. 😀 Your timer program was a success! diff --git a/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md b/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md index 743f3890..af72004e 100644 --- a/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md +++ b/6-consumer/lessons/3-spoken-feedback/virtual-device-text-to-speech.md @@ -1,4 +1,56 @@ # Text to speech - Virtual IoT device +In this part of the lesson, you will write code to convert text to speech using the speech service. -https://docs.microsoft.com/en-us/dotnet/api/microsoft.cognitiveservices.speech.speechsynthesisoutputformat?view=azure-dotnet \ No newline at end of file +## Convert text to speech + +The speech services SDK that you used in the last lesson to convert speech to text can be used to convert text back to speech. When requesting speech, you need to provide the voice to use as speech can be generated using a variety of different voices. + +Each language supports a range of different voices, and you can get the list of supported voices for each language from the speech services SDK. + +### Task - convert text to speech + +1. Import the `SpeechSynthesizer` from the `azure.cognitiveservices.speech` package by adding it to the existing imports: + + ```python + from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer, SpeechSynthesizer + ``` + +1. Above the `say` function, create a speech configuration to use with the speech synthesizer: + + ```python + speech_config = SpeechConfig(subscription=api_key, + region=location) + speech_config.speech_synthesis_language = language + speech_synthesizer = SpeechSynthesizer(speech_config=speech_config) + ``` + + This uses the same API key, location and language that was used by the recognizer. + +1. Below this, add the following code to get a voice and set it on the speech config: + + ```python + voices = speech_synthesizer.get_voices_async().get().voices + first_voice = next(x for x in voices if x.locale.lower() == language.lower()) + speech_config.speech_synthesis_voice_name = first_voice.short_name + ``` + + This retrieves a list of all the available voices, then finds the first voice that matches the language that is being used. + + > 💁 You can get the full list of supported voices from the [Language and voice support documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support?WT.mc_id=academic-17441-jabenn#text-to-speech). If you want to use a specific voice, then you can remove this function and hard code the voice to the voice name from this documentation. For example: + > + > ```python + > speech_config.speech_synthesis_voice_name = 'hi-IN-SwaraNeural' + > ``` + +1. Finally update the contents of the `say` function to use the speech synthesizer to speak the response: + + ```python + speech_synthesizer.speak_text(text) + ``` + +1. Run the app, and ensure the function app is also running. Set some timers, and you will hear a spoken response saying that your timer has been set, then another spoken response when the timer is complete. + +> 💁 You can find this code in the [code-spoken-response/virtual-iot-device](code-spoken-response/virtual-iot-device) folder. + +😀 Your timer program was a success!