Commit 6d6785cf authored by Matěj Toul's avatar Matěj Toul
Browse files

audio: added SDL and minimp3 "adapters" to osi module

parent 5610eda9
Loading
Loading
Loading
Loading
+157 −0
Original line number Diff line number Diff line
#ifndef OSI_AUDIO_HPP_INCLUDED
#   define OSI_AUDIO_HPP_INCLUDED

#   include <cstdint>
#   include <vector>
#   include <string>

namespace osi {

enum class AudioFormat
{
    Float32  // 32-bit float samples, system byte order
};

struct AudioSpec
{
    std::uint32_t sample_rate;    // e.g., 44100 Hz
    std::uint32_t channels;       // e.g., 2 for stereo
    AudioFormat format;           // sample format
    std::uint32_t buffer_size;    // buffer size in frames
    void* userdata;               // user callback data
    void (*callback)(void* userdata, std::uint8_t* stream, int len);

    AudioSpec()
        : sample_rate(44100)
        , channels(2)
        , format(AudioFormat::Float32)
        , buffer_size(4096)
        , userdata(nullptr)
        , callback(nullptr)
    {}
};

class AudioDevice final
{
public:
    AudioDevice();
    ~AudioDevice();

    AudioDevice(AudioDevice const&) = delete;
    AudioDevice(AudioDevice&&) = delete;
    AudioDevice& operator=(AudioDevice const&) const = delete;
    AudioDevice& operator=(AudioDevice&&) const = delete;
    
    // This function accepts the desired spec, but also returns the obtained spec back
	// It is important that the audio system works with the obtained spec, as the audio device may not support exactly what you asked for
    // and you will get the closest match instead
    void open(AudioSpec const& desired, AudioSpec& obtained);
    void close();
    
    void pause(bool paused);
    bool is_open() const;
    
    std::uint32_t sample_rate() const { return m_sample_rate; }
    std::uint32_t channels() const { return m_channels; }

private:
    std::uint32_t m_device_id;
    std::uint32_t m_sample_rate;
    std::uint32_t m_channels;
};

class AudioConverter final
{
public:
    AudioConverter();
    ~AudioConverter();
    
    AudioConverter(AudioConverter const&) = delete;
    AudioConverter(AudioConverter&&) = delete;
    AudioConverter& operator=(AudioConverter const&) const = delete;
    AudioConverter& operator=(AudioConverter&&) const = delete;
    
    void create(AudioFormat src_format, std::uint8_t src_channels, std::uint32_t src_rate,
                AudioFormat dst_format, std::uint8_t dst_channels, std::uint32_t dst_rate);
    
    void destroy();
    
	// Inputs data for conversion
    void put(void const* data, std::uint32_t bytes);

	// Gets converted data
    std::uint32_t get(void* data, std::uint32_t bytes);
    
    std::uint32_t available() const;
    
    void flush();
    void clear();
    
    bool is_valid() const;
    
private:
    void* m_stream; // Opaque pointer to underlying stream
};

struct WavData
{
    std::vector<float> samples; // Decoded PCM samples
    std::uint32_t sample_rate;
    std::uint32_t channels;
    AudioFormat format;
    
    WavData()
        : sample_rate(0)
        , channels(0)
        , format(AudioFormat::Float32)
    {}
};

class AudioLoader final
{
public:
    // Load WAV from memory buffer and convert to target format
    // Returns converted samples directly
    static WavData load_wav(std::vector<std::uint8_t> const& file_data, std::uint32_t target_sample_rate, std::uint32_t target_channels, AudioFormat target_format);
};

class MP3Decoder final
{
public:
    MP3Decoder();
    ~MP3Decoder();
    
    MP3Decoder(MP3Decoder const&) = delete;
    MP3Decoder(MP3Decoder&&) = delete;
    MP3Decoder& operator=(MP3Decoder const&) = delete;
    MP3Decoder& operator=(MP3Decoder&&) = delete;
    
    // Opens MP3 decoder with target output format
    // mp3_data: raw MP3 file data (must remain valid during MP3Decoder lifetime)
    bool open(std::vector<std::uint8_t> const& mp3_data, std::uint32_t target_sample_rate, std::uint8_t target_channels);
    void close();
    
    std::uint32_t decode(float* output, std::uint32_t frame_count);

    void reset();
    
    bool is_open() const;
    
    std::uint32_t source_sample_rate() const { return m_source_sample_rate; }
    std::uint8_t source_channels() const { return m_source_channels; }
    
private:
    void* m_decoder; // Opaque pointer to underlying decoder
    std::vector<std::uint8_t> const* m_mp3_data;
    std::uint32_t m_target_sample_rate;
    std::uint8_t m_target_channels;
    std::uint32_t m_source_sample_rate;
    std::uint8_t m_source_channels;
    AudioConverter m_converter;
    std::vector<float> m_decode_buffer;
    bool m_is_open;
};

}

#endif

osi/src/audio.cpp

0 → 100644
+383 −0
Original line number Diff line number Diff line
#include <osi/audio.hpp>
#include <utils/assumptions.hpp>
#include <SDL2/SDL_audio.h>
#include <cstring>

// minimp3 configuration
#define MINIMP3_IMPLEMENTATION
#define MINIMP3_FLOAT_OUTPUT
#define MINIMP3_NO_STDIO
#include <minimp3/minimp3_ex.h>
#undef min
#undef max

namespace osi {

static SDL_AudioFormat to_sdl_format(AudioFormat format)
{
    switch (format)
    {
        case AudioFormat::Float32: return AUDIO_F32SYS;
        default: return AUDIO_F32SYS;
    }
}

// === AudioDevice Implementation ===
AudioDevice::AudioDevice()
    : m_device_id(0)
    , m_sample_rate(0)
    , m_channels(0)
{
}

AudioDevice::~AudioDevice()
{
    close();
}

void AudioDevice::open(AudioSpec const& desired, AudioSpec& obtained)
{
    ASSUMPTION(!is_open() && "AudioDevice already open");
    
    SDL_AudioSpec desired_spec;
    SDL_zero(desired_spec);
    desired_spec.freq = desired.sample_rate;
    desired_spec.format = to_sdl_format(desired.format);
    desired_spec.channels = static_cast<std::uint8_t>(desired.channels);
    desired_spec.samples = static_cast<std::uint16_t>(desired.buffer_size);
    desired_spec.callback = desired.callback;
    desired_spec.userdata = desired.userdata;
    
    SDL_AudioSpec obtained_spec;
    m_device_id = SDL_OpenAudioDevice(
        nullptr, 0, &desired_spec, &obtained_spec, 0);
    
    ASSUMPTION(m_device_id != 0 && SDL_GetError());
    m_sample_rate = obtained_spec.freq;
    m_channels = obtained_spec.channels;
    
    obtained.sample_rate = obtained_spec.freq;
    obtained.channels = obtained_spec.channels;
    obtained.format = desired.format; // Format doesn't change
    obtained.buffer_size = obtained_spec.samples;
    obtained.callback = obtained_spec.callback;
    obtained.userdata = obtained_spec.userdata;
}

void AudioDevice::close()
{
    if (is_open())
    {
        SDL_CloseAudioDevice(m_device_id);
        m_device_id = 0;
        m_sample_rate = 0;
        m_channels = 0;
    }
}

void AudioDevice::pause(bool paused)
{
    ASSUMPTION(is_open() && "AudioDevice not open");
    SDL_PauseAudioDevice(m_device_id, paused ? 1 : 0);
}

bool AudioDevice::is_open() const
{
    return m_device_id != 0;
}

// === AudioConverter Implementation ===
AudioConverter::AudioConverter()
    : m_stream(nullptr)
{
}

AudioConverter::~AudioConverter()
{
    destroy();
}

void AudioConverter::create(AudioFormat src_format, std::uint8_t src_channels, std::uint32_t src_rate,
                         AudioFormat dst_format, std::uint8_t dst_channels, std::uint32_t dst_rate)
{
    ASSUMPTION(!is_valid() && "AudioConverter already created");
    
    SDL_AudioStream* stream = SDL_NewAudioStream(
        to_sdl_format(src_format), src_channels, src_rate,
        to_sdl_format(dst_format), dst_channels, dst_rate
    );
    
    ASSUMPTION(stream != nullptr && SDL_GetError());
    
    m_stream = stream;
}

void AudioConverter::destroy()
{
    if (is_valid())
    {
        SDL_FreeAudioStream(static_cast<SDL_AudioStream*>(m_stream));
        m_stream = nullptr;
    }
}

void AudioConverter::put(void const* data, std::uint32_t bytes)
{
    ASSUMPTION(is_valid() && "AudioConverter not created");
    int result = SDL_AudioStreamPut(static_cast<SDL_AudioStream*>(m_stream), data, bytes);
    ASSUMPTION(result >= 0 && SDL_GetError());
}

std::uint32_t AudioConverter::get(void* data, std::uint32_t bytes)
{
    ASSUMPTION(is_valid() && "AudioConverter not created");
    int bytes_read = SDL_AudioStreamGet(static_cast<SDL_AudioStream*>(m_stream), data, bytes);
    ASSUMPTION(bytes_read >= 0 && SDL_GetError());
    return static_cast<std::uint32_t>(bytes_read);
}

std::uint32_t AudioConverter::available() const
{
    ASSUMPTION(is_valid() && "AudioConverter not created");
    int avail = SDL_AudioStreamAvailable(static_cast<SDL_AudioStream*>(m_stream));
    ASSUMPTION(avail >= 0 && SDL_GetError());
    return static_cast<std::uint32_t>(avail);
}

void AudioConverter::flush()
{
    ASSUMPTION(is_valid() && "AudioConverter not created");
    int result = SDL_AudioStreamFlush(static_cast<SDL_AudioStream*>(m_stream));
    ASSUMPTION(result >= 0 && SDL_GetError());
}

void AudioConverter::clear()
{
    ASSUMPTION(is_valid() && "AudioConverter not created");
    SDL_AudioStreamClear(static_cast<SDL_AudioStream*>(m_stream));
}

bool AudioConverter::is_valid() const
{
    return m_stream != nullptr;
}

// === AudioLoader Implementation ===
WavData AudioLoader::load_wav(std::vector<std::uint8_t> const& file_data, std::uint32_t target_sample_rate, std::uint32_t target_channels, AudioFormat target_format)
{
    // Create RWops from memory
    SDL_RWops* rw = SDL_RWFromConstMem(file_data.data(), static_cast<int>(file_data.size()));
    ASSUMPTION(rw != nullptr && SDL_GetError());
    
    // Load WAV file
    SDL_AudioSpec source_spec;
    std::uint8_t* wav_buffer = nullptr;
    std::uint32_t wav_length = 0;
    
    SDL_AudioSpec* loaded_spec = SDL_LoadWAV_RW(rw, 1, &source_spec, &wav_buffer, &wav_length);
    ASSUMPTION(loaded_spec != nullptr && SDL_GetError());
    
    // Create audio stream for conversion
	// We can't use AudioConverter, because the source format can be arbitrary (non-engine)
    SDL_AudioStream* stream = SDL_NewAudioStream(
        source_spec.format, source_spec.channels, source_spec.freq,
        to_sdl_format(target_format), static_cast<std::uint8_t>(target_channels), target_sample_rate
    );
    
    if (stream == nullptr)
    {
        SDL_FreeWAV(wav_buffer);
        ASSUMPTION(stream != nullptr && SDL_GetError());
    }
    
    // Convert audio data
    int put_result = SDL_AudioStreamPut(stream, wav_buffer, wav_length);
    if (put_result < 0)
    {
        SDL_FreeAudioStream(stream);
        SDL_FreeWAV(wav_buffer);
        ASSUMPTION(put_result >= 0 && SDL_GetError());
    }
    
    SDL_AudioStreamFlush(stream);
    
    int available = SDL_AudioStreamAvailable(stream);
    if (available <= 0)
    {
        SDL_FreeAudioStream(stream);
        SDL_FreeWAV(wav_buffer);
        ASSUMPTION(available > 0 && "No audio data available after conversion");
    }
    
    // Read converted data
    WavData result;
    result.sample_rate = target_sample_rate;
    result.channels = target_channels;
    result.format = target_format;
    result.samples.resize(available / sizeof(float));
    
    SDL_AudioStreamGet(stream, result.samples.data(), available);
    
    // Cleanup
    SDL_FreeAudioStream(stream);
    SDL_FreeWAV(wav_buffer);
    
    return result;
}

// === MP3Decoder Implementation ===
MP3Decoder::MP3Decoder()
    : m_decoder(nullptr)
    , m_mp3_data(nullptr)
    , m_target_sample_rate(0)
    , m_target_channels(0)
    , m_source_sample_rate(0)
    , m_source_channels(0)
    , m_converter()
    , m_is_open(false)
{
}

MP3Decoder::~MP3Decoder()
{
    close();
}

bool MP3Decoder::open(std::vector<std::uint8_t> const& mp3_data, 
                       std::uint32_t target_sample_rate, 
                       std::uint8_t target_channels)
{
    ASSUMPTION(!is_open() && "MP3Decoder already open");
    
    m_mp3_data = &mp3_data;
    m_target_sample_rate = target_sample_rate;
    m_target_channels = target_channels;
    
    // Allocate decoder
    mp3dec_ex_t* dec = new mp3dec_ex_t();
    
    // Initialize mp3 decoder
    if (mp3dec_ex_open_buf(dec, m_mp3_data->data(), m_mp3_data->size(), MP3D_SEEK_TO_SAMPLE) != 0)
    {
        delete dec;
        return false;
    }
    
    m_decoder = dec;
    m_source_sample_rate = dec->info.hz;
    m_source_channels = static_cast<std::uint8_t>(dec->info.channels);
    
    // Create audio converter for format conversion if needed
    if (m_source_sample_rate != m_target_sample_rate || m_source_channels != m_target_channels)
    {
        m_converter.create(
            AudioFormat::Float32, m_source_channels, m_source_sample_rate,
            AudioFormat::Float32, m_target_channels, m_target_sample_rate
        );
    }
    
    m_decode_buffer.resize(MINIMP3_MAX_SAMPLES_PER_FRAME * 2); // Stereo float samples
    m_is_open = true;
    
    return true;
}

void MP3Decoder::close()
{
    if (!is_open())
        return;
    
    m_converter.destroy();
    
    if (m_decoder != nullptr)
    {
        mp3dec_ex_t* dec = static_cast<mp3dec_ex_t*>(m_decoder);
        mp3dec_ex_close(dec);
        delete dec;
        m_decoder = nullptr;
    }
    
    m_mp3_data = nullptr;
    m_is_open = false;
}

std::uint32_t MP3Decoder::decode(float* output, std::uint32_t frame_count)
{
    if (!is_open() || m_decoder == nullptr)
        return 0;
    
    mp3dec_ex_t* dec = static_cast<mp3dec_ex_t*>(m_decoder);
    std::uint32_t frames_decoded = 0;
    
    if (m_converter.is_valid())
    {
        // Need format conversion
        // Check if we have buffered data in the stream
        std::uint32_t available = m_converter.available();
        std::uint32_t frames_available = available / (sizeof(float) * m_target_channels);
        
        if (frames_available < frame_count)
        {
            // Decode more MP3 frames and feed to converter
            std::uint32_t frames_needed = frame_count - frames_available;
            std::uint32_t samples_to_decode = frames_needed * m_source_channels * 2; // Decode extra
            
            if (samples_to_decode > m_decode_buffer.size())
                samples_to_decode = static_cast<std::uint32_t>(m_decode_buffer.size());
            
            std::uint32_t samples_decoded = static_cast<std::uint32_t>(
                mp3dec_ex_read(dec, m_decode_buffer.data(), samples_to_decode)
            );
            
            if (samples_decoded > 0)
            {
                m_converter.put(m_decode_buffer.data(), samples_decoded * sizeof(float));
            }
        }
        
        // Read converted samples from stream
        std::uint32_t bytes_to_read = frame_count * m_target_channels * sizeof(float);
        std::uint32_t bytes_read = m_converter.get(output, bytes_to_read);
        
        if (bytes_read > 0)
        {
            frames_decoded = bytes_read / (sizeof(float) * m_target_channels);
        }
    }
    else
    {
        // Direct decode (no conversion needed)
        std::uint32_t samples_to_decode = frame_count * m_source_channels;
        std::uint32_t samples_decoded = static_cast<std::uint32_t>(
            mp3dec_ex_read(dec, output, samples_to_decode)
        );
        frames_decoded = samples_decoded / m_source_channels;
    }
    
    return frames_decoded;
}

void MP3Decoder::reset()
{
    if (!is_open() || m_decoder == nullptr)
        return;
    
    mp3dec_ex_t* dec = static_cast<mp3dec_ex_t*>(m_decoder);
    
    // Seek back to beginning
    mp3dec_ex_seek(dec, 0);
    
    // Clear audio converter if present
    if (m_converter.is_valid())
    {
        m_converter.clear();
    }
}

bool MP3Decoder::is_open() const
{
    return m_is_open;
}

}