parent
c38636e8aa
commit
c9d4e9f24f
@ -0,0 +1,61 @@
|
|||||||
|
import io
|
||||||
|
import pyaudio
|
||||||
|
import time
|
||||||
|
import wave
|
||||||
|
|
||||||
|
from grove.factory import Factory
|
||||||
|
button = Factory.getButton('GPIO-LOW', 17)
|
||||||
|
|
||||||
|
audio = pyaudio.PyAudio()
|
||||||
|
microphone_card_number = 1
|
||||||
|
speaker_card_number = 1
|
||||||
|
rate = 48000
|
||||||
|
|
||||||
|
def capture_audio():
|
||||||
|
stream = audio.open(format = pyaudio.paInt16,
|
||||||
|
rate = rate,
|
||||||
|
channels = 1,
|
||||||
|
input_device_index = microphone_card_number,
|
||||||
|
input = True,
|
||||||
|
frames_per_buffer = 4096)
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
while button.is_pressed():
|
||||||
|
frames.append(stream.read(4096))
|
||||||
|
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
wav_buffer = io.BytesIO()
|
||||||
|
with wave.open(wav_buffer, 'wb') as wavefile:
|
||||||
|
wavefile.setnchannels(1)
|
||||||
|
wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
|
||||||
|
wavefile.setframerate(rate)
|
||||||
|
wavefile.writeframes(b''.join(frames))
|
||||||
|
wav_buffer.seek(0)
|
||||||
|
|
||||||
|
return wav_buffer
|
||||||
|
|
||||||
|
def play_audio(buffer):
|
||||||
|
stream = audio.open(format = pyaudio.paInt16,
|
||||||
|
rate = rate,
|
||||||
|
channels = 1,
|
||||||
|
output_device_index = speaker_card_number,
|
||||||
|
output = True)
|
||||||
|
|
||||||
|
with wave.open(buffer, 'rb') as wf:
|
||||||
|
data = wf.readframes(4096)
|
||||||
|
|
||||||
|
while len(data) > 0:
|
||||||
|
stream.write(data)
|
||||||
|
data = wf.readframes(4096)
|
||||||
|
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
while not button.is_pressed():
|
||||||
|
time.sleep(.1)
|
||||||
|
|
||||||
|
buffer = capture_audio()
|
||||||
|
play_audio(buffer)
|
@ -0,0 +1,82 @@
|
|||||||
|
import io
|
||||||
|
import json
|
||||||
|
import pyaudio
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import wave
|
||||||
|
|
||||||
|
from grove.factory import Factory
|
||||||
|
button = Factory.getButton('GPIO-LOW', 17)
|
||||||
|
|
||||||
|
audio = pyaudio.PyAudio()
|
||||||
|
microphone_card_number = 1
|
||||||
|
speaker_card_number = 1
|
||||||
|
rate = 48000
|
||||||
|
|
||||||
|
def capture_audio():
|
||||||
|
stream = audio.open(format = pyaudio.paInt16,
|
||||||
|
rate = rate,
|
||||||
|
channels = 1,
|
||||||
|
input_device_index = microphone_card_number,
|
||||||
|
input = True,
|
||||||
|
frames_per_buffer = 4096)
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
while button.is_pressed():
|
||||||
|
frames.append(stream.read(4096))
|
||||||
|
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
|
||||||
|
wav_buffer = io.BytesIO()
|
||||||
|
with wave.open(wav_buffer, 'wb') as wavefile:
|
||||||
|
wavefile.setnchannels(1)
|
||||||
|
wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
|
||||||
|
wavefile.setframerate(rate)
|
||||||
|
wavefile.writeframes(b''.join(frames))
|
||||||
|
wav_buffer.seek(0)
|
||||||
|
|
||||||
|
return wav_buffer
|
||||||
|
|
||||||
|
api_key = '<key>'
|
||||||
|
location = '<location>'
|
||||||
|
language = '<language>'
|
||||||
|
|
||||||
|
def get_access_token():
|
||||||
|
headers = {
|
||||||
|
'Ocp-Apim-Subscription-Key': api_key
|
||||||
|
}
|
||||||
|
|
||||||
|
token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
|
||||||
|
response = requests.post(token_endpoint, headers=headers)
|
||||||
|
return str(response.text)
|
||||||
|
|
||||||
|
def convert_speech_to_text(buffer):
|
||||||
|
url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1'
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer ' + get_access_token(),
|
||||||
|
'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}',
|
||||||
|
'Accept': 'application/json;text/xml'
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'language': language
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(url, headers=headers, params=params, data=buffer)
|
||||||
|
response_json = json.loads(response.text)
|
||||||
|
|
||||||
|
if response_json['RecognitionStatus'] == 'Success':
|
||||||
|
return response_json['DisplayText']
|
||||||
|
else:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
while True:
|
||||||
|
while not button.is_pressed():
|
||||||
|
time.sleep(.1)
|
||||||
|
|
||||||
|
buffer = capture_audio()
|
||||||
|
text = convert_speech_to_text(buffer)
|
||||||
|
print(text)
|
@ -0,0 +1,205 @@
|
|||||||
|
# Capture audio - Raspberry Pi
|
||||||
|
|
||||||
|
In this part of the lesson, you will write code to capture audio on your Raspberry Pi. Audio capture will be controlled by a button.
|
||||||
|
|
||||||
|
## Hardware
|
||||||
|
|
||||||
|
The Raspberry Pi needs a button to control the audio capture.
|
||||||
|
|
||||||
|
The button you will use is a Grove button. This is a digital sensor that turns a signal on or off. These buttons can be configured to send a high signal when the button is pressed, and low when it is not, or low when pressed and high when not.
|
||||||
|
|
||||||
|
If you are using a ReSpeaker 2-Mics Pi HAT as a microphone, then there is no need to connect a button as this hat has one fitted already. Skip to the next section.
|
||||||
|
|
||||||
|
### Connect the button
|
||||||
|
|
||||||
|
The button can be connected to the Grove base hat.
|
||||||
|
|
||||||
|
#### Task - connect the button
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
1. Insert one end of a Grove cable into the socket on the button module. It will only go in one way round.
|
||||||
|
|
||||||
|
1. With the Raspberry Pi powered off, connect the other end of the Grove cable to the digital socket marked **D5** on the Grove Base hat attached to the Pi. This socket is the second from the left, on the row of sockets next to the GPIO pins.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## Capture audio
|
||||||
|
|
||||||
|
You can capture audio from the microphone using Python code.
|
||||||
|
|
||||||
|
### Task - capture audio
|
||||||
|
|
||||||
|
1. Power up the Pi and wait for it to boot
|
||||||
|
|
||||||
|
1. Launch VS Code, either directly on the Pi, or connect via the Remote SSH extension.
|
||||||
|
|
||||||
|
1. The PyAudio Pip package has functions to record and play back audio. This package depends on some audio libraries that need to be installed first. Run the following commands in the terminal to install these:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install libportaudio0 libportaudio2 libportaudiocpp0 portaudio19-dev libasound2-plugins --yes
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Install the PyAudio Pip package.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
pip3 install pyaudio
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Create a new folder called `smart-timer` and add a file called `app.py` to this folder.
|
||||||
|
|
||||||
|
1. Add the following imports to the top of this file:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import io
|
||||||
|
import pyaudio
|
||||||
|
import time
|
||||||
|
import wave
|
||||||
|
|
||||||
|
from grove.factory import Factory
|
||||||
|
```
|
||||||
|
|
||||||
|
This imports the `pyaudio` module, some standard Python modules to handle wave files, and the `grove.factory` module to import a `Factory` to create a button class.
|
||||||
|
|
||||||
|
1. Below this, add code to create a Grove button.
|
||||||
|
|
||||||
|
If you are using the ReSpeaker 2-Mics Pi HAT, use the following code:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# The button on the ReSpeaker 2-Mics Pi HAT
|
||||||
|
button = Factory.getButton("GPIO-LOW", 17)
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a button on port **D17**, the port that the button on the ReSpeaker 2-Mics Pi HAT is connected to. This button is set to send a low signal when pressed.
|
||||||
|
|
||||||
|
If you are not using the ReSpeaker 2-Mics Pi HAT, and are using a Grove button connected to the base hat, use this code.
|
||||||
|
|
||||||
|
```python
|
||||||
|
button = Factory.getButton("GPIO-HIGH", 5)
|
||||||
|
```
|
||||||
|
|
||||||
|
This creates a button on port **D5** that is set to send a high signal when pressed.
|
||||||
|
|
||||||
|
1. Below this, create an instance of the PyAudio class to handle audio:
|
||||||
|
|
||||||
|
```python
|
||||||
|
audio = pyaudio.PyAudio()
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Declare the hardware card number for the microphone and speaker. This will be the number of the card you found by running `arecord -l` and `aplay -l` earlier in this lesson.
|
||||||
|
|
||||||
|
```python
|
||||||
|
microphone_card_number = <microphone card number>
|
||||||
|
speaker_card_number = <speaker card number>
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace `<microphone card number>` with the number of your microphones card.
|
||||||
|
|
||||||
|
Replace `<speaker card number>` with the number of your speakers card, the same number you set in the `alsa.conf` file.
|
||||||
|
|
||||||
|
1. Below this, declare the sample rate to use for the audio capture and playback. You may need to change this depending on the hardware you are using.
|
||||||
|
|
||||||
|
```python
|
||||||
|
rate = 48000 #48KHz
|
||||||
|
```
|
||||||
|
|
||||||
|
If you get sample rate errors when running this code later, change this value to `44100` or `16000`. The higher the value, the better the quality of the sound.
|
||||||
|
|
||||||
|
1. Below this, create a new function called `capture_audio`. This will be called to capture audio from the microphone:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def capture_audio():
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Inside this function, add the following to capture the audio:
|
||||||
|
|
||||||
|
```python
|
||||||
|
stream = audio.open(format = pyaudio.paInt16,
|
||||||
|
rate = rate,
|
||||||
|
channels = 1,
|
||||||
|
input_device_index = microphone_card_number,
|
||||||
|
input = True,
|
||||||
|
frames_per_buffer = 4096)
|
||||||
|
|
||||||
|
frames = []
|
||||||
|
|
||||||
|
while button.is_pressed():
|
||||||
|
frames.append(stream.read(4096))
|
||||||
|
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
This code opens an audio input stream using the PyAudio object. This stream will capture audio from the microphone at 16KHz, capturing it in buffers of 4096 bytes in size.
|
||||||
|
|
||||||
|
The code then loops whilst the Grove button is pressed, reading these 4096 byte buffers into an array each time.
|
||||||
|
|
||||||
|
> 💁 You can read more on the options passed to the `open` method in the [PyAudio documentation](https://people.csail.mit.edu/hubert/pyaudio/docs/).
|
||||||
|
|
||||||
|
Once the button is released, the stream is stopped and closed.
|
||||||
|
|
||||||
|
1. Add the following to the end of this function:
|
||||||
|
|
||||||
|
```python
|
||||||
|
wav_buffer = io.BytesIO()
|
||||||
|
with wave.open(wav_buffer, 'wb') as wavefile:
|
||||||
|
wavefile.setnchannels(1)
|
||||||
|
wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
|
||||||
|
wavefile.setframerate(rate)
|
||||||
|
wavefile.writeframes(b''.join(frames))
|
||||||
|
wav_buffer.seek(0)
|
||||||
|
|
||||||
|
return wav_buffer
|
||||||
|
```
|
||||||
|
|
||||||
|
This code creates a binary buffer, and writes all the captured audio to it as a [WAV file](https://wikipedia.org/wiki/WAV). This is a standard way to write uncompressed audio to a file. This buffer is then returned.
|
||||||
|
|
||||||
|
1. Add the following `play_audio` function to play back the audio buffer:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def play_audio(buffer):
|
||||||
|
stream = audio.open(format = pyaudio.paInt16,
|
||||||
|
rate = rate,
|
||||||
|
channels = 1,
|
||||||
|
output_device_index = speaker_card_number,
|
||||||
|
output = True)
|
||||||
|
|
||||||
|
with wave.open(buffer, 'rb') as wf:
|
||||||
|
data = wf.readframes(4096)
|
||||||
|
|
||||||
|
while len(data) > 0:
|
||||||
|
stream.write(data)
|
||||||
|
data = wf.readframes(4096)
|
||||||
|
|
||||||
|
stream.close()
|
||||||
|
```
|
||||||
|
|
||||||
|
This function opens another audio stream, this time for output - to play the audio. It uses the same settings as the input stream. The buffer is then opened as a wave file and written to the output stream in 4096 byte chunks, playing the audio. The stream is then closed.
|
||||||
|
|
||||||
|
1. Add the following code below the `capture_audio` function to loop until the button is pressed. Once the button is pressed, the audio is captured, then played.
|
||||||
|
|
||||||
|
```python
|
||||||
|
while True:
|
||||||
|
while not button.is_pressed():
|
||||||
|
time.sleep(.1)
|
||||||
|
|
||||||
|
buffer = capture_audio()
|
||||||
|
play_audio(buffer)
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Run the code. Press the button and speak into the microphone. Release the button when you are done, and you will hear the recording.
|
||||||
|
|
||||||
|
You may see some ALSA errors when the PyAudio instance is created. This is due to configuration on the Pi for audio devices you don't have. You can ignore these errors.
|
||||||
|
|
||||||
|
```output
|
||||||
|
pi@raspberrypi:~/smart-timer $ python3 app.py
|
||||||
|
ALSA lib pcm.c:2565:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.front
|
||||||
|
ALSA lib pcm.c:2565:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.rear
|
||||||
|
ALSA lib pcm.c:2565:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.center_lfe
|
||||||
|
ALSA lib pcm.c:2565:(snd_pcm_open_noupdate) Unknown PCM cards.pcm.side
|
||||||
|
```
|
||||||
|
|
||||||
|
> 💁 You can find this code in the [code-record/pi](code-record/pi) folder.
|
||||||
|
|
||||||
|
😀 Your audio recording program was a success!
|
@ -1,36 +0,0 @@
|
|||||||
## find devices
|
|
||||||
|
|
||||||
aplay -l
|
|
||||||
|
|
||||||
arecord -l
|
|
||||||
|
|
||||||
## configure defaults
|
|
||||||
|
|
||||||
sudo nano /usr/share/alsa/alsa.conf
|
|
||||||
|
|
||||||
defaults.pcm.card 1
|
|
||||||
|
|
||||||
speaker-test -t wav -c 2
|
|
||||||
|
|
||||||
## test
|
|
||||||
|
|
||||||
arecord --format=S16_LE --duration=5 --rate=16000 --file-type=raw out.raw
|
|
||||||
aplay --format=S16_LE --rate=16000 out.raw
|
|
||||||
|
|
||||||
## Create service
|
|
||||||
|
|
||||||
az group create --name smart-timer --location westus2
|
|
||||||
|
|
||||||
az cognitiveservices account create \
|
|
||||||
--name smart-timer \
|
|
||||||
--resource-group smart-timer \
|
|
||||||
--kind SpeechServices \
|
|
||||||
--sku F0 \
|
|
||||||
--location westus2 \
|
|
||||||
--yes
|
|
||||||
|
|
||||||
copy endpoint for token issuer
|
|
||||||
|
|
||||||
az cognitiveservices account keys list \
|
|
||||||
--name smart-timer \
|
|
||||||
--resource-group smart-timer
|
|
@ -0,0 +1,143 @@
|
|||||||
|
# Configure your microphone and speakers - Raspberry Pi
|
||||||
|
|
||||||
|
In this part of the lesson, you will add a microphone and speakers to your Raspberry Pi.
|
||||||
|
|
||||||
|
## Hardware
|
||||||
|
|
||||||
|
The Raspberry Pi needs a microphone.
|
||||||
|
|
||||||
|
The Pi doesn't have a microphone built in, you will need to add an external microphone. There are multiple ways to do this:
|
||||||
|
|
||||||
|
* USB microphone
|
||||||
|
* USB headset
|
||||||
|
* USB all in one speakerphone
|
||||||
|
* USB audio adapter and microphone with a 3.5mm jack
|
||||||
|
* [ReSpeaker 2-Mics Pi HAT](https://www.seeedstudio.com/ReSpeaker-2-Mics-Pi-HAT.html)
|
||||||
|
|
||||||
|
> 💁 Bluetooth microphones are not all supported on the Raspberry Pi, so if you have a bluetooth microphone or headset, you may have issues pairing or capturing audio.
|
||||||
|
|
||||||
|
Raspberry Pis come with a 3.5mm headphone jack. You can use this to connect headphones, a headset or a speaker. You can also add speakers using:
|
||||||
|
|
||||||
|
* HDMI audio through a monitor or TV
|
||||||
|
* USB speakers
|
||||||
|
* USB headset
|
||||||
|
* USB all in one speakerphone
|
||||||
|
* [ReSpeaker 2-Mics Pi HAT](https://www.seeedstudio.com/ReSpeaker-2-Mics-Pi-HAT.html) with a speaker attached, either to the 3.5mm jack or to the JST port
|
||||||
|
|
||||||
|
## Connect and configure the microphone and speakers
|
||||||
|
|
||||||
|
The microphone and speakers need to be connected, and configured.
|
||||||
|
|
||||||
|
### Task - connect and configure the microphone
|
||||||
|
|
||||||
|
1. Connect the microphone using the appropriate method. For example, connect it via one of the USB ports.
|
||||||
|
|
||||||
|
1. If you are using the ReSpeaker 2-Mics Pi HAT, you can remove the Grove base hat, then fit the ReSpeaker hat in it's place.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
You will need a Grove button later in this lesson, but one is built into this hat, so the Grove base hat is not needed.
|
||||||
|
|
||||||
|
Once the hat is fitted, you will need to install some drivers. Refer to the [Seeed getting started instructions](https://wiki.seeedstudio.com/ReSpeaker_2_Mics_Pi_HAT_Raspberry/#getting-started) for driver installation instructions.
|
||||||
|
|
||||||
|
> ⚠️ The instructions use `git` to clone a repository. If you don't have `git` installed on your Pi, you can install it by running the following command:
|
||||||
|
>
|
||||||
|
> ```sh
|
||||||
|
> sudo apt install git --yes
|
||||||
|
> ```
|
||||||
|
|
||||||
|
1. Run the following command in your Terminal either on the Pi, or connected using VS Code and a remote SSH session to see information about the connected microphone:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
arecord -l
|
||||||
|
```
|
||||||
|
|
||||||
|
You will see a list of connected microphones. It will be something like the following:
|
||||||
|
|
||||||
|
```output
|
||||||
|
pi@raspberrypi:~ $ arecord -l
|
||||||
|
**** List of CAPTURE Hardware Devices ****
|
||||||
|
card 1: M0 [eMeet M0], device 0: USB Audio [USB Audio]
|
||||||
|
Subdevices: 1/1
|
||||||
|
Subdevice #0: subdevice #0
|
||||||
|
```
|
||||||
|
|
||||||
|
Assuming you only have one microphone, you should only see one entry. Configuration of mics can be tricky on Linux, so it is easiest to only use one microphone and unplug any others.
|
||||||
|
|
||||||
|
Note down the card number, as you will need this later. In the output above the card number is 1.
|
||||||
|
|
||||||
|
### Task - connect and configure the speaker
|
||||||
|
|
||||||
|
1. Connect the speakers using the appropriate method.
|
||||||
|
|
||||||
|
1. Run the following command in your Terminal either on the Pi, or connected using VS Code and a remote SSH session to see information about the connected speakers:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
aplay -l
|
||||||
|
```
|
||||||
|
|
||||||
|
You will see a list of connected speakers. It will be something like the following:
|
||||||
|
|
||||||
|
```output
|
||||||
|
pi@raspberrypi:~ $ aplay -l
|
||||||
|
**** List of PLAYBACK Hardware Devices ****
|
||||||
|
card 0: Headphones [bcm2835 Headphones], device 0: bcm2835 Headphones [bcm2835 Headphones]
|
||||||
|
Subdevices: 8/8
|
||||||
|
Subdevice #0: subdevice #0
|
||||||
|
Subdevice #1: subdevice #1
|
||||||
|
Subdevice #2: subdevice #2
|
||||||
|
Subdevice #3: subdevice #3
|
||||||
|
Subdevice #4: subdevice #4
|
||||||
|
Subdevice #5: subdevice #5
|
||||||
|
Subdevice #6: subdevice #6
|
||||||
|
Subdevice #7: subdevice #7
|
||||||
|
card 1: M0 [eMeet M0], device 0: USB Audio [USB Audio]
|
||||||
|
Subdevices: 1/1
|
||||||
|
Subdevice #0: subdevice #0
|
||||||
|
```
|
||||||
|
|
||||||
|
You will always see `card 0: Headphones` as this is the built-in headphone jack. If you have added additional speakers, such as a USB speaker, you will see this listed as well.
|
||||||
|
|
||||||
|
1. If you are using an additional speaker, and not a speaker or headphones connected to the built-in headphone jack, you need to configure it as the default. To do this run the following command:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
sudo nano /usr/share/alsa/alsa.conf
|
||||||
|
```
|
||||||
|
|
||||||
|
This will open a configuration file in `nano`, a terminal-based text editor. Scroll down using the arrow keys on your keyboard until you find the following line:
|
||||||
|
|
||||||
|
```output
|
||||||
|
defaults.pcm.card 0
|
||||||
|
```
|
||||||
|
|
||||||
|
Change the value from `0` to the card number of the card you want to use from the list that came back from the call to `aplay -l`. For example, in the output above there is a second sound card called `card 1: M0 [eMeet M0], device 0: USB Audio [USB Audio]`, using card 1. To use this, I would update the line to be:
|
||||||
|
|
||||||
|
```output
|
||||||
|
defaults.pcm.card 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Set this value to the appropriate card number. You can navigate to the number using the arrow keys on your keyboard, then delete and type the new number as normal when editing text files.
|
||||||
|
|
||||||
|
1. Save the changes and close the file by pressing `Ctrl+x`. Press `y` to save the file, then `return` to select the file name.
|
||||||
|
|
||||||
|
### Task - test the microphone and speaker
|
||||||
|
|
||||||
|
1. Run the following command to record 5 seconds of audio through the microphone:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
arecord --format=S16_LE --duration=5 --rate=16000 --file-type=wav out.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
Whilst this command is running, make noise into the microphone such as by speaking, singing, beat boxing, playing an instrument or whatever takes your fancy.
|
||||||
|
|
||||||
|
1. After 5 seconds, the recording will stop. Run the following command to play back the audio:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
aplay --format=S16_LE --rate=16000 out.wav
|
||||||
|
```
|
||||||
|
|
||||||
|
You will hear the audio bing played back through the speakers. Adjust the output volume on your speaker as necessary.
|
||||||
|
|
||||||
|
1. If you need to adjust the volume of the built-in microphone port, or adjust the gain of the microphone, you can use the `alsamixer` utility. You can read more on this utility on thw [Linux alsamixer man page](https://linux.die.net/man/1/alsamixer)
|
||||||
|
|
||||||
|
1. If you get errors playing back the audio, check the card you set as the `defaults.pcm.card` in the `alsa.conf` file.
|
@ -1,5 +1,106 @@
|
|||||||
## capture audio
|
# Speech to text - Raspberry Pi
|
||||||
|
|
||||||
sudo apt-get install libportaudio0 libportaudio2 libportaudiocpp0 portaudio19-dev
|
In this part of the lesson, you will write code to convert speech in the captured audio to text using the speech service.
|
||||||
|
|
||||||
pip3 install pyaudio
|
## Send the audio to the speech service
|
||||||
|
|
||||||
|
The audio can be sent to the speech service using the REST API. To use the speech service, first you need to request an access token, then use that token to access the REST API. These access tokens expire after 10 minutes, so your code should request them on a regular basis to ensure they are always up to date.
|
||||||
|
|
||||||
|
### Task - get an access token
|
||||||
|
|
||||||
|
1. Open the `smart-timer` project on your Pi.
|
||||||
|
|
||||||
|
1. Remove the `play_audio` function. This is no longer needed as you don't want a smart timer to repeat back to you what you said.
|
||||||
|
|
||||||
|
1. Add the following imports to the top of the `app.py` file:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Add the following code above the `while True` loop to declare some settings for the speech service:
|
||||||
|
|
||||||
|
```python
|
||||||
|
api_key = '<key>'
|
||||||
|
location = '<location>'
|
||||||
|
language = '<language>'
|
||||||
|
```
|
||||||
|
|
||||||
|
Replace `<key>` with the API key for your speech service. Replace `<location>` with the location you used when you created the speech service resource.
|
||||||
|
|
||||||
|
Replace `<language>` with the locale name for language you will be speaking in, for example `en-GB` for English, or `zn-HK` for Cantonese. You can find a list of the supported languages and their locale names in the [Language and voice support documentation on Microsoft docs](https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support?WT.mc_id=academic-17441-jabenn#speech-to-text).
|
||||||
|
|
||||||
|
1. Below this, add the following function to get an access token:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_access_token():
|
||||||
|
headers = {
|
||||||
|
'Ocp-Apim-Subscription-Key': api_key
|
||||||
|
}
|
||||||
|
|
||||||
|
token_endpoint = f'https://{location}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
|
||||||
|
response = requests.post(token_endpoint, headers=headers)
|
||||||
|
return str(response.text)
|
||||||
|
```
|
||||||
|
|
||||||
|
This calls a token issuing endpoint, passing the API key as a header. This call returns an access token that can be used to call the speech services.
|
||||||
|
|
||||||
|
1. Below this, declare a function to convert speech in the captured audio to text using the REST API:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def convert_speech_to_text(buffer):
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Inside this function, set up the REST API URL and headers:
|
||||||
|
|
||||||
|
```python
|
||||||
|
url = f'https://{location}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1'
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer ' + get_access_token(),
|
||||||
|
'Content-Type': f'audio/wav; codecs=audio/pcm; samplerate={rate}',
|
||||||
|
'Accept': 'application/json;text/xml'
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'language': language
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This builds a URL using the location of the speech services resource. It then populates the headers with the access token from the `get_access_token` function, as well as the sample rate used to capture the audio. Finally it defines some parameters to be passed with the URL containing the language in the audio.
|
||||||
|
|
||||||
|
1. Below this, add the following code to call the REST API and get back the text:
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = requests.post(url, headers=headers, params=params, data=buffer)
|
||||||
|
response_json = json.loads(response.text)
|
||||||
|
|
||||||
|
if response_json['RecognitionStatus'] == 'Success':
|
||||||
|
return response_json['DisplayText']
|
||||||
|
else:
|
||||||
|
return ''
|
||||||
|
```
|
||||||
|
|
||||||
|
This calls the URL and decodes the JSON value that comes in the response. The `RecognitionStatus` value in the response indicates if the call was able to extract speech into text successfully, and if this is `Success` then the text is returned from the function, otherwise an empty string is returned.
|
||||||
|
|
||||||
|
1. Finally replace the call to `play_audio` in the `while True` loop with a call to the `convert_speech_to_text` function, as well as printing the text to the console:
|
||||||
|
|
||||||
|
```python
|
||||||
|
text = convert_speech_to_text(buffer)
|
||||||
|
print(text)
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Run the code. Press the button and speak into the microphone. Release the button when you are done, and you will see the audio converted to text in the output.
|
||||||
|
|
||||||
|
```output
|
||||||
|
pi@raspberrypi:~/smart-timer $ python3 app.py
|
||||||
|
Hello world.
|
||||||
|
Welcome to IoT for beginners.
|
||||||
|
```
|
||||||
|
|
||||||
|
Try different types of sentences, along with sentences where words sound the same but have different meanings. For example, if you are speaking in English, say 'I want to buy two bananas and an apple too', and notice how it will use the correct to, two and too based on the context of the word, not just it's sound.
|
||||||
|
|
||||||
|
> 💁 You can find this code in the [code-speech-to-text/pi](code-speech-to-text/pi) folder.
|
||||||
|
|
||||||
|
😀 Your speech to text program was a success!
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
# Capture audio - Virtual IoT device
|
||||||
|
|
||||||
|
The Python libraries that you will be using later in this lesson to convert speech to text have built-in audio capture on Windows, macOS and Linux. You don't need to do anything here.
|
@ -0,0 +1,12 @@
|
|||||||
|
# Configure your microphone and speakers - Virtual IoT Hardware
|
||||||
|
|
||||||
|
The virtual IoT hardware will use a microphone and speakers attached to your computer.
|
||||||
|
|
||||||
|
If your computer doesn't have a microphone and speakers built in, you will need to attach these using hardware of your choice, such as:
|
||||||
|
|
||||||
|
* USB microphone
|
||||||
|
* USB speakers
|
||||||
|
* Speakers built into your monitor and connected over HDMI
|
||||||
|
* Bluetooth headset
|
||||||
|
|
||||||
|
Refer to your hardware manufacturers instructions to install and configure this hardware.
|
@ -0,0 +1,3 @@
|
|||||||
|
# Configure your microphone and speakers - Wio Terminal
|
||||||
|
|
||||||
|
Coming soon
|
After Width: | Height: | Size: 176 KiB |
After Width: | Height: | Size: 387 KiB |
After Width: | Height: | Size: 371 KiB |
Loading…
Reference in new issue