diff --git a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/pi/soil-moisture-sensor/app.py b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/pi/soil-moisture-sensor/app.py index 4447137c..a0b5c0cf 100644 --- a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/pi/soil-moisture-sensor/app.py +++ b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/pi/soil-moisture-sensor/app.py @@ -4,16 +4,16 @@ from grove.grove_relay import GroveRelay import json from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse -connection_string = "" +connection_string = '' adc = ADC() relay = GroveRelay(5) device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -32,7 +32,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/virtual-device/soil-moisture-sensor/app.py b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/virtual-device/soil-moisture-sensor/app.py index 6a118e48..aa211db1 100644 --- a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/virtual-device/soil-moisture-sensor/app.py +++ b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/code/virtual-device/soil-moisture-sensor/app.py @@ -7,16 +7,16 @@ from counterfit_shims_grove.grove_relay import GroveRelay import json from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse -connection_string = "" +connection_string = '' adc = ADC() relay = GroveRelay(5) device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -35,7 +35,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/single-board-computer-connect-hub.md b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/single-board-computer-connect-hub.md index 2dc79635..9c9386ca 100644 --- a/2-farm/lessons/4-migrate-your-plant-to-the-cloud/single-board-computer-connect-hub.md +++ b/2-farm/lessons/4-migrate-your-plant-to-the-cloud/single-board-computer-connect-hub.md @@ -43,9 +43,9 @@ The next step is to connect your device to IoT Hub. ```python device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) - print("Connecting") + print('Connecting') device_client.connect() - print("Connected") + print('Connected') ``` 1. Run this code. You will see your device connect. @@ -66,7 +66,7 @@ Now that your device is connected, you can send telemetry to the IoT Hub instead 1. Add the following code inside the `while True` loop, just before the sleep: ```python - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) ``` diff --git a/2-farm/lessons/6-keep-your-plant-secure/code/pi/soil-moisture-sensor/app.py b/2-farm/lessons/6-keep-your-plant-secure/code/pi/soil-moisture-sensor/app.py index 7346d4d3..08118f71 100644 --- a/2-farm/lessons/6-keep-your-plant-secure/code/pi/soil-moisture-sensor/app.py +++ b/2-farm/lessons/6-keep-your-plant-secure/code/pi/soil-moisture-sensor/app.py @@ -13,9 +13,9 @@ x509 = X509("./soil-moisture-sensor-x509-cert.pem", "./soil-moisture-sensor-x509 device_client = IoTHubDeviceClient.create_from_x509_certificate(x509, host_name, device_id) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -34,7 +34,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/2-farm/lessons/6-keep-your-plant-secure/code/virtual-device/soil-moisture-sensor/app.py b/2-farm/lessons/6-keep-your-plant-secure/code/virtual-device/soil-moisture-sensor/app.py index 87e95aa1..2e29032e 100644 --- a/2-farm/lessons/6-keep-your-plant-secure/code/virtual-device/soil-moisture-sensor/app.py +++ b/2-farm/lessons/6-keep-your-plant-secure/code/virtual-device/soil-moisture-sensor/app.py @@ -16,9 +16,9 @@ x509 = X509("./soil-moisture-sensor-x509-cert.pem", "./soil-moisture-sensor-x509 device_client = IoTHubDeviceClient.create_from_x509_certificate(x509, host_name, device_id) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -37,7 +37,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/3-transport/lessons/1-location-tracking/code-gps-decode/pi/gps-sensor/app.py b/3-transport/lessons/1-location-tracking/code-gps-decode/pi/gps-sensor/app.py index 4dbbd6aa..3aad9865 100644 --- a/3-transport/lessons/1-location-tracking/code-gps-decode/pi/gps-sensor/app.py +++ b/3-transport/lessons/1-location-tracking/code-gps-decode/pi/gps-sensor/app.py @@ -4,7 +4,7 @@ import pynmea2 import json from azure.iot.device import IoTHubDeviceClient, Message -connection_string = "" +connection_string = '' serial = serial.Serial('/dev/ttyAMA0', 9600, timeout=1) serial.reset_input_buffer() @@ -12,9 +12,9 @@ serial.flush() device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def printGPSData(line): msg = pynmea2.parse(line) diff --git a/3-transport/lessons/1-location-tracking/code-gps-decode/virtual-device/gps-sensor/app.py b/3-transport/lessons/1-location-tracking/code-gps-decode/virtual-device/gps-sensor/app.py index 1bff864c..0383f8fd 100644 --- a/3-transport/lessons/1-location-tracking/code-gps-decode/virtual-device/gps-sensor/app.py +++ b/3-transport/lessons/1-location-tracking/code-gps-decode/virtual-device/gps-sensor/app.py @@ -7,15 +7,15 @@ import pynmea2 import json from azure.iot.device import IoTHubDeviceClient, Message -connection_string = "" +connection_string = '' serial = counterfit_shims_serial.Serial('/dev/ttyAMA0') device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def send_gps_data(line): msg = pynmea2.parse(line) diff --git a/3-transport/lessons/2-store-location-data/code/pi/gps-sensor/app.py b/3-transport/lessons/2-store-location-data/code/pi/gps-sensor/app.py index 4447137c..a0b5c0cf 100644 --- a/3-transport/lessons/2-store-location-data/code/pi/gps-sensor/app.py +++ b/3-transport/lessons/2-store-location-data/code/pi/gps-sensor/app.py @@ -4,16 +4,16 @@ from grove.grove_relay import GroveRelay import json from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse -connection_string = "" +connection_string = '' adc = ADC() relay = GroveRelay(5) device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -32,7 +32,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/3-transport/lessons/2-store-location-data/code/virtual-device/gps-sensor/app.py b/3-transport/lessons/2-store-location-data/code/virtual-device/gps-sensor/app.py index 6a118e48..aa211db1 100644 --- a/3-transport/lessons/2-store-location-data/code/virtual-device/gps-sensor/app.py +++ b/3-transport/lessons/2-store-location-data/code/virtual-device/gps-sensor/app.py @@ -7,16 +7,16 @@ from counterfit_shims_grove.grove_relay import GroveRelay import json from azure.iot.device import IoTHubDeviceClient, Message, MethodResponse -connection_string = "" +connection_string = '' adc = ADC() relay = GroveRelay(5) device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) -print("Connecting") +print('Connecting') device_client.connect() -print("Connected") +print('Connected') def handle_method_request(request): print("Direct method received - ", request.name) @@ -35,7 +35,7 @@ while True: soil_moisture = adc.read(0) print("Soil moisture:", soil_moisture) - message = Message(json.dumps({ "soil_moisture": soil_moisture })) + message = Message(json.dumps({ 'soil_moisture': soil_moisture })) device_client.send_message(message) time.sleep(10) \ No newline at end of file diff --git a/6-consumer/lessons/1-speech-recognition/README.md b/6-consumer/lessons/1-speech-recognition/README.md index cd04f04d..c94c02c8 100644 --- a/6-consumer/lessons/1-speech-recognition/README.md +++ b/6-consumer/lessons/1-speech-recognition/README.md @@ -16,24 +16,77 @@ Add a sketchnote if possible/appropriate 'Alexa set a 8 minute timer called steam broccoli' -Smart devices are becoming more and more pervasive. Not just as smart speakers like HomePods, Echos and Google Homes, but embedded in our phones, watches, and even light fittings and thermostats. I have at least 19 devices in my home that have voice assistants, and that's just the ones I know about! +Smart devices are becoming more and more pervasive. Not just as smart speakers like HomePods, Echos and Google Homes, but embedded in our phones, watches, and even light fittings and thermostats. -Voice control increases accessibility allowing folks with limited movement to interact with devices. Whether it is a permanent disability such as being born without arms, to temporary disabilities such as broken arms, or having your hands full of shopping or young children, being able to control our houses from our voice instead of our hands opens up a world of access. Shouting 'Hey Siri, close my garage door' whilst dealing with a baby change and an unruly toddler can be a small but effective improvement on life. +> πŸ’ I have at least 19 devices in my home that have voice assistants, and that's just the ones I know about! + +Voice control increases accessibility by allowing folks with limited movement to interact with devices. Whether it is a permanent disability such as being born without arms, to temporary disabilities such as broken arms, or having your hands full of shopping or young children, being able to control our houses from our voice instead of our hands opens up a world of access. Shouting 'Hey Siri, close my garage door' whilst dealing with a baby change and an unruly toddler can be a small but effective improvement on life. One of the more popular uses for voice assistants is setting timers, especially kitchen timers. Being able to set multiple timers with just your voice is a great help in the kitchen - no need to stop kneading dough, stirring soup, or clean dumpling filling off your hands to use a physical timer. -In this lesson you will learn about building voice recognition into IoT devices. You'll learn about microphones as sensors, how to capture audio from a microphone attached to an IoT device, and how to use AI to convert what is heard into text. Throughout the rest of this project you will build a smart kitchen timer, able to set timers using your voice multiple languages. +In this lesson you will learn about building voice recognition into IoT devices. You'll learn about microphones as sensors, how to capture audio from a microphone attached to an IoT device, and how to use AI to convert what is heard into text. Throughout the rest of this project you will build a smart kitchen timer, able to set timers using your voice with multiple languages. In this lesson we'll cover: * [Microphones](#microphones) * [Capture audio from your IoT device](#capture-audio-from-your-iot-device) * [Speech to text](#speech-to-text) -* [Privacy](#privacy) * [Convert speech to text](#convert-speech-to-text) ## Microphones +Microphones are analog sensors that convert sound waves into electrical signals. Vibrations in air cause components in the microphone to move tiny amounts, and these cause tiny changes in electrical signals. These changes are then amplified to generate an electrical output. + +### Microphone types + +Microphones come in a variety of types: + +* Dynamic - Dynamic microphones have magnet attached to a moving diaphragm that moves in a coil of wire creating an electrical current. This is the opposite of most loudspeakers, that use an electrical current to move a magnet in a coil of wire, moving a diaphragm to create sound. This means speakers can be used a dynamic microphones, and dynamic microphones can be used as speakers. In devices such as intercoms where a user is either listening or speaking, but not both, one device can act as both a speaker and a microphone. + + Dynamic microphones don't need power to work, the electrical signal is created entirely from the microphone. + + ![Patti Smith singing into a Shure SM58 (dynamic cardioid type) microphone](../../../images/dynamic-mic.jpg) + + ***Beni KΓΆhler / [Creative Commons Attribution-Share Alike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/deed.en)*** + +* Ribbon - Ribbon microphones are similar to dynamic microphones, except they have a metal ribbon instead of a diaphragm. This ribbon moves in a magnetic field generating an electrical current. Like dynamic microphones, ribbon microphones don't need power to work. + + ![Edmund Lowe, American actor, standing at radio microphone (labeled for (NBC) Blue Network), holding script, 1942](../../../images/ribbon-mic.jpg) + +* Condenser - Condenser microphones have a thin metal diaphragm and a fixed metal backplate. Electricity is applied to both of these and as the diaphragm vibrates the static charge between the plates changes generating a signal. Condenser microphones need power to work - called *Phantom power*. + + ![C451B small-diaphragm condenser microphone by AKG Acoustics](../../../images/condenser-mic.jpg) + + ***[Harumphy](https://en.wikipedia.org/wiki/User:Harumphy) at [en.wikipedia](https://en.wikipedia.org/) / [Creative Commons Attribution-Share Alike 3.0 Unported](https://creativecommons.org/licenses/by-sa/3.0/deed.en)*** + +* MEMS - Microelectromechanical systems microphones, or MEMS, are microphones on a chip. They have a pressure sensitive diaphragm etched onto a silicon chip, and work similar to a condenser microphone. These microphones can be tiny, and integrated into circuitry. + + ![A MEMS microphone on a circuit board](../../../images/mems-microphone.png) + + In the image above, the chip labelled **LEFT** is a MEMS microphone, with a tiny diaphragm less than a millimeter wide. + +βœ… Do some research: What microphones do you have around you - either in your computer, your phone, your headset or in other devices. What type of microphones are they? + +### Digital audio + +Audio is an analog signal carrying very fine-grained information. To convert this signal to digital, the audio needs to be sampled many thousands of times a second. + +> πŸŽ“ Sampling is converting the audio signal into a digital value that represents the signal at that point in time. + +![A line chart showing a signal, with discrete points at fixed intervals](../../../images/sampling.png) + +Digital audio is sampled using Pulse Code Modulation, or PCM. PCM involves reading the voltage of the signal, and selecting the closest discrete value to that voltage using a defined size. + +> πŸ’ You can think of PCM as the sensor version of pulse width modulation, or PWM (PWM was covered back in [lesson 3 of the getting started project](../../../1-getting-started/lessons/3-sensors-and-actuators/README.md#pulse-width-modulation)). PCM involves converting an analog signal to digital, PWM involves converting a digital signal to analog. + +For example most streaming music services offer 16-bit or 24-bit audio. This means they convert the voltage into a value that fits into a 16-bit integer, or 24-bit integer. 16-bit audio fits the value into a number ranging from -32,768 to 32,767, 24-bit is in the range βˆ’8,388,608 to 8,388,607. The more bits, the closer the sample is to what our ears actually hear. + +> πŸ’ You may have hard of 8-bit audio, often referred to as LoFi. This is audio sampled using only 8-bits, so -128 to 127. The first computer audio was limited to 8 bits due to hardware limitations, so this is often seen in retro gaming. + +These samples are taken many thousands of times per second, using well-defined sample rates measured in KHz (thousands of readings per second). Streaming music services use 48KHz for most audio, but some 'loseless' audio uses up to 96KHz or even 192KHz. The higher the sample rate, the closer to the original the audio will be, up to a point. There is debate whether humans can tell the difference above 48KHz. + +βœ… Do some research: If you use a streaming music service, what sample rate and size does it use? If you use CDs, what is the sample rate and size of CD audio? + ## Capture audio from your IoT device Your IoT device can be connected to a microphone to capture audio, ready for conversion to text. It can also be connected to speakers to output audio. In later lessons this will be used to give audio feedback, but it is useful to set up speakers now to test the microphone. @@ -56,17 +109,37 @@ Work through the relevant guide to capture audio on your IoT device: ## Speech to text +Speech to text, or speech recognition, involves using AI to convert words in an audio signal to text. +### Speech recognition models +To convert speech to text, samples from the audio signal are grouped together and fed into a machine learning model based around a Recurrent Neural network (RNN). This is a type of machine learning model that can use previous data to make a decision about incoming data. For example, the RNN could detect one block of audio samples as the sound 'Hel', and when it receives another that it thinks is the sound 'lo', it can combine this with the previous sound, see that 'Hello' is a valid word and select that as the outcome. -## Privacy +ML models always accept data of the same size every time. The image classifier you built in an earlier lesson resizes images to a fixed size and processes them. The same with speech models, they have to process fixed sized audio chunks. The speech models need to be able to combine the outputs of multiple predictions to get the answer, to allow it to distinguish between 'Hi' and 'Highway', or 'flock' and 'floccinaucinihilipilification'. +Speech models are also advanced enough to understand context, and can correct the words they detect as more sounds are processed. For example, if you say "I went to the shops to get two bananas and an apple too", you would use three words that sound the same, but are spelled differently - to, two and too. Speech models are able to understand the context and use the appropriate spelling of the word. +> πŸ’ Some speech services allow customization to make them work better in noisy environments such as factories, or with industry-specific words such as chemical names. These customizations are trained by providing sample audio and a transcription, and work using transfer learning, the same as how you trained an image classifier using only a few images in an earlier lesson. -Wake words. TinyML. Not a button - just to make it easier. +### Privacy +When using speech to text in a consumer IoT device, privacy is incredibly important. These devices listen to audio continuously, so as a consumer you don't want everything you say being sent to the cloud and converted to text. Not only will this use a lot of Internet bandwidth, it also has massive privacy implications, especially when some smart device makers randomly select audio for [humans to validate against the text generated to help improve their model](https://www.theverge.com/2019/4/10/18305378/amazon-alexa-ai-voice-assistant-annotation-listen-private-recordings). +You only want your smart device to send audio to the cloud for processing when you are using it, not when it hears audio in your home, audio that could include private meetings or intimate interactions. The way most smart devices work is with a *wake word*, a key phrase such as "Alexa", "Hey Siri", or "OK Google" that causes the device to 'wake up' and listen to what you are saying up until it detects a break in your speech, indicating you have finished talking to the device. +> πŸŽ“ Wake word detection is also referred to as *Keyword spotting* or *Keyword recognition*. + +These wake words are detected on the device, not in the cloud. These smart devices have small AI models that run on the device that listen for the wake work, and when it is detected, start streaming the audio to the cloud for recognition. These models are very specialized, and just listen for the wake word. + +> πŸ’ Some tech companies are adding more privacy to their devices and doing some of the speech to text conversion on the device. Apple have announced that as part of their 2021 iOS and macOS updates they will support the speech to text conversion on device, and be able to handle many requests without needing to use the cloud. This is thanks to having powerful processors in their devices that can run ML models. + +βœ… What do you think are the privacy and ethical implications of storing the audio sent to the cloud? Should this audio be stored, and if so, how? Do you thing the use of recordings for law enforcement is a good trade off for the loss of privacy? + +Wake word detection usually uses a technique know an TinyML, that is converting ML models to be able to run on microcontrollers. These models are small in size, and consume very little power to run. + +To avoid the complexity of training and using a wake word model, the smart timer you are building in this lesson will use a button to turn on the speech recognition. + +> πŸ’ If you want to try creating a wake word detection model to run on the Wio Terminal or Raspberry Pi, check out this [Responding to your voice tutorial by Edge Impulse](https://docs.edgeimpulse.com/docs/responding-to-your-voice). If you want to use your computer to do this, you can try the [Get started with Custom Keyword quickstart on the Microsoft docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/keyword-recognition-overview?WT.mc_id=academic-17441-jabenn). ## Convert speech to text @@ -125,16 +198,23 @@ To use the results of the speech to text conversion, you need to send it to the 1. Verify that messages are being sent by monitoring the Event Hub compatible endpoint using the `az iot hub monitor-events` command. +> πŸ’ You can find this code in the [code-iot-hub/virtual-iot-device](code-iot-hub/virtual-iot-device), [code-iot-hub/pi](code-iot-hub/pi), or [code-iot-hub/wio-terminal](code-iot-hub/wio-terminal) folder. + --- ## πŸš€ Challenge +Speech recognition has been around for a long time, and is continuously improving. Research the current capabilities and see how these have evolved over time, including how accurate machine transcriptions are compared to human. + +What do you think the future holds for speech recognition? + ## Post-lecture quiz [Post-lecture quiz](https://brave-island-0b7c7f50f.azurestaticapps.net/quiz/34) ## Review & Self Study +* Read about the different microphone types and how they work on the [What's the difference between dynamic and condenser microphones article on Musician's HQ](https://musicianshq.com/whats-the-difference-between-dynamic-and-condenser-microphones/). * Read more on the Cognitive Services speech service on the [Speech service documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/?WT.mc_id=academic-17441-jabenn) * Read about keyword spotting on the [Keyword recognition documentation on Microsoft Docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/keyword-recognition-overview?WT.mc_id=academic-17441-jabenn) diff --git a/6-consumer/lessons/1-speech-recognition/code-iot-hub/pi/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-iot-hub/pi/smart-timer/app.py new file mode 100644 index 00000000..b821a839 --- /dev/null +++ b/6-consumer/lessons/1-speech-recognition/code-iot-hub/pi/smart-timer/app.py @@ -0,0 +1,94 @@ +import io +import json +import pyaudio +import requests +import time +import wave + +from azure.iot.device import IoTHubDeviceClient, Message + +from grove.factory import Factory +button = Factory.getButton('GPIO-HIGH', 5) + +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +audio = pyaudio.PyAudio() +microphone_card_number = 1 +speaker_card_number = 1 +rate = 48000 + +def capture_audio(): + stream = audio.open(format = pyaudio.paInt16, + rate = rate, + channels = 1, + input_device_index = microphone_card_number, + input = True, + frames_per_buffer = 4096) + + frames = [] + + while button.is_pressed(): + frames.append(stream.read(4096)) + + stream.stop_stream() + stream.close() + + wav_buffer = io.BytesIO() + with wave.open(wav_buffer, 'wb') as wavefile: + wavefile.setnchannels(1) + wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) + wavefile.setframerate(rate) + wavefile.writeframes(b''.join(frames)) + wav_buffer.seek(0) + + return wav_buffer + +api_key = '' +location = '' +language = '' + +def get_access_token(): + headers = { + 'Ocp-Apim-Subscription-Key': api_key + } + + token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken' + response = requests.post(token_endpoint, headers=headers) + return str(response.text) + +def convert_speech_to_text(buffer): + url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1' + + headers = { + 'Authorization': 'Bearer ' + get_access_token(), + 'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}', + 'Accept': 'application/json;text/xml' + } + + params = { + 'language': language + } + + response = requests.post(url, headers=headers, params=params, data=buffer) + response_json = json.loads(response.text) + + if response_json['RecognitionStatus'] == 'Success': + return response_json['DisplayText'] + else: + return '' + +while True: + while not button.is_pressed(): + time.sleep(.1) + + buffer = capture_audio() + text = convert_speech_to_text(buffer) + if len(text) > 0: + message = Message(json.dumps({ 'speech': text })) + device_client.send_message(message) \ No newline at end of file diff --git a/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py new file mode 100644 index 00000000..59df6635 --- /dev/null +++ b/6-consumer/lessons/1-speech-recognition/code-iot-hub/virtual-iot-device/smart-timer/app.py @@ -0,0 +1,33 @@ +import json +import time +from azure.cognitiveservices.speech import SpeechConfig, SpeechRecognizer +from azure.iot.device import IoTHubDeviceClient, Message + +api_key = '' +location = '' +language = '' +connection_string = '' + +device_client = IoTHubDeviceClient.create_from_connection_string(connection_string) + +print('Connecting') +device_client.connect() +print('Connected') + +speech_config = SpeechConfig(subscription=api_key, + region=location, + speech_recognition_language=language) + +recognizer = SpeechRecognizer(speech_config=speech_config) + +def recognized(args): + if len(args.result.text) > 0: + message = Message(json.dumps({ 'speech': args.result.text })) + device_client.send_message(message) + +recognizer.recognized.connect(recognized) + +recognizer.start_continuous_recognition() + +while True: + time.sleep(1) \ No newline at end of file diff --git a/6-consumer/lessons/1-speech-recognition/code-record/pi/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-record/pi/smart-timer/app.py index 61e30db6..8b5edd7c 100644 --- a/6-consumer/lessons/1-speech-recognition/code-record/pi/smart-timer/app.py +++ b/6-consumer/lessons/1-speech-recognition/code-record/pi/smart-timer/app.py @@ -4,7 +4,7 @@ import time import wave from grove.factory import Factory -button = Factory.getButton('GPIO-LOW', 17) +button = Factory.getButton('GPIO-HIGH', 5) audio = pyaudio.PyAudio() microphone_card_number = 1 diff --git a/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py b/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py index 534d5e80..64eb2990 100644 --- a/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py +++ b/6-consumer/lessons/1-speech-recognition/code-speech-to-text/pi/smart-timer/app.py @@ -6,7 +6,7 @@ import time import wave from grove.factory import Factory -button = Factory.getButton('GPIO-LOW', 17) +button = Factory.getButton('GPIO-HIGH', 5) audio = pyaudio.PyAudio() microphone_card_number = 1 diff --git a/6-consumer/lessons/1-speech-recognition/pi-audio.md b/6-consumer/lessons/1-speech-recognition/pi-audio.md index b76c094d..7e8f4163 100644 --- a/6-consumer/lessons/1-speech-recognition/pi-audio.md +++ b/6-consumer/lessons/1-speech-recognition/pi-audio.md @@ -200,6 +200,14 @@ You can capture audio from the microphone using Python code. ALSA lib pcm.c:2565:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.side ``` + If you see the following error: + + ```output + OSError: [Errno -9997] Invalid sample rate + ``` + + then change the `rate` to either 44100 or 16000. + > πŸ’ You can find this code in the [code-record/pi](code-record/pi) folder. πŸ˜€ Your audio recording program was a success! diff --git a/6-consumer/lessons/1-speech-recognition/wio-terminal-audio.md b/6-consumer/lessons/1-speech-recognition/wio-terminal-audio.md new file mode 100644 index 00000000..9c643d65 --- /dev/null +++ b/6-consumer/lessons/1-speech-recognition/wio-terminal-audio.md @@ -0,0 +1,3 @@ +# Capture audio - Wio Terminal + +Coming soon! diff --git a/6-consumer/lessons/1-speech-recognition/wio-terminal-microphone.md b/6-consumer/lessons/1-speech-recognition/wio-terminal-microphone.md index 0a8118e6..1e9bb936 100644 --- a/6-consumer/lessons/1-speech-recognition/wio-terminal-microphone.md +++ b/6-consumer/lessons/1-speech-recognition/wio-terminal-microphone.md @@ -1,3 +1,3 @@ # Configure your microphone and speakers - Wio Terminal -Coming soon +Coming soon! diff --git a/6-consumer/lessons/1-speech-recognition/wio-terminal-speech-to-text.md b/6-consumer/lessons/1-speech-recognition/wio-terminal-speech-to-text.md new file mode 100644 index 00000000..e89f1caa --- /dev/null +++ b/6-consumer/lessons/1-speech-recognition/wio-terminal-speech-to-text.md @@ -0,0 +1,3 @@ +# Speech to text - Wio Terminal + +Coming soon! diff --git a/images/Diagrams.sketch b/images/Diagrams.sketch index ad785858..ac57b578 100644 Binary files a/images/Diagrams.sketch and b/images/Diagrams.sketch differ diff --git a/images/condenser-mic.jpg b/images/condenser-mic.jpg new file mode 100644 index 00000000..df407c82 Binary files /dev/null and b/images/condenser-mic.jpg differ diff --git a/images/dynamic-mic.jpg b/images/dynamic-mic.jpg new file mode 100644 index 00000000..e4961bd7 Binary files /dev/null and b/images/dynamic-mic.jpg differ diff --git a/images/mems-microphone.png b/images/mems-microphone.png new file mode 100644 index 00000000..2471d153 Binary files /dev/null and b/images/mems-microphone.png differ diff --git a/images/ribbon-mic.jpg b/images/ribbon-mic.jpg new file mode 100644 index 00000000..f6452e08 Binary files /dev/null and b/images/ribbon-mic.jpg differ diff --git a/images/sampling.png b/images/sampling.png new file mode 100644 index 00000000..8fa2f8a4 Binary files /dev/null and b/images/sampling.png differ