Skip to main content

Section 3.10 Sound with I2S

Subsection 3.10.1 Playing sound with I2S

from machine import Pin, I2S
import math
import struct
from time import sleep

# SAMPLE_RATE: Number of audio samples to play per second
# This is a tradeoff between audio fidelity and file size.  The maximum frequency
# that can be played will be exactly half of the sample rate, so a sample rate of
# 8000 (Hz) will cut off everything above 4kHz.
# The file size is also proportional to the sample rate:
#     size = sample rate (samples/second) * duration (seconds) * 2 (bytes / sample)
SAMPLE_RATE = 8000
BYTES_PER_SAMPLE = 2 # Normally 2 (16-bit), but could be 4 (32-bit)

sck_pin = Pin(14) # Serial clock (BCLK on breakout)
ws_pin = Pin(13) # Word select (LRCLK on breakout)
sd_pin = Pin(12) # Serial data (DIN on breakout)

# Open the audio channel using I2S (Inter-IC-Sound)
audio = I2S(0, # This must be either 0 or 1 for ESP32
            sck=sck_pin, ws=ws_pin, sd=sd_pin,
            mode=I2S.TX,
            bits=8*BYTES_PER_SAMPLE,
            format=I2S.MONO,
            rate=8000,
            ibuf=10000)

# Let's generate a simple tone
TONE_FREQ = 440
AMPLITUDE = 3000 # For 16-bit, max of 32767 (it'll clip around 30k)

n_samples = SAMPLE_RATE // TONE_FREQ
buffer_size = n_samples * BYTES_PER_SAMPLE

buf = bytearray(buffer_size)

# Fill the buffer with a sine wave
for i in range(n_samples):
    sample = int(AMPLITUDE * math.sin(2 * math.pi * i / n_samples))
    print(sample)
    # buf is an array of individual bytes, but we're working with values that
    # need to be stored in 2-byte pairs.  Use Python's `struct` module to do
    # that packing.
    # "<h" means use 2-byte signed values; i*BYTES_PER_SAMPLE is the offset
    struct.pack_into("<h", buf, i*BYTES_PER_SAMPLE, sample)

while True:
    audio.write(buf)

audio.deinit()

Subsection 3.10.2 Playing sound files

If you have more complex sounds, you'll probably want to pre-record them and play the sound from a file. You can use Audacity or another sound editor to record or transcode the sounds into the following format:

  • Bit depth: 16 bits / sample

  • Sample rate: 8000

You can do this with the following code:

from machine import Pin, I2S
import math
import struct
from time import sleep

sck_pin = Pin(14) # Serial clock (BCLK on breakout)
ws_pin = Pin(13) # Word select (LRCLK on breakout)
sd_pin = Pin(12) # Serial data (DIN on breakout)

# Open the audio channel using I2S (Inter-IC-Sound)
audio = I2S(0, # This must be either 0 or 1 for ESP32
            sck=sck_pin, ws=ws_pin, sd=sd_pin,
            mode=I2S.TX,
            bits=16,
            format=I2S.MONO,
            rate=8000, # This must match the sample rate of your file!
            ibuf=10000)

# Let's play a clip in a .wav file
WAVFILE = "fuzzy.wav"
BUFFER_SIZE = 10000

wav = open(WAVFILE, "rb") # Open the file to read its bytes
pos = wav.seek(44) # Skip over the WAV header information and get to the data

# Create a memory buffer to store the samples
buf = bytearray(BUFFER_SIZE)
# And create a "memoryview" (which is basically another window into the same data),
# which will allow us to read the file directly into the buffer.
wav_samples_mv = memoryview(buf)

# Wrap the sound-playing in a try-except block
# If something goes wrong in the middle (like the user pressing 'Stop'), we'll
# run the "except" part and then clean up. Otherwise, we can end up with the I2S
# device stuck playing, which is *really* annoying.
try:
    while True:
        # Try to read some bytes from the wave file into the buffer
        # The `readinto` function returns the number of bytes that it read
        bytes_read = wav.readinto(wav_samples_mv)
        
        # If the function didn't read anything, we must have reached the end of the file
        if bytes_read == 0:
           break # Quit the loop and stop playing
        else:
            # If we did read some bytes, send them to the speaker
            num_written = audio.write(wav_samples_mv[:bytes_read])

except (KeyboardInterrupt) as e:
    pass

audio.deinit()