diff --git a/build/android/app/CMakeLists.txt b/build/android/app/CMakeLists.txt index ee3e594..94345ef 100644 --- a/build/android/app/CMakeLists.txt +++ b/build/android/app/CMakeLists.txt @@ -49,6 +49,7 @@ project(kaliber) add_library(kaliber SHARED ../../../src/base/collusion_test.cc ../../../src/base/log.cc + ../../../src/base/mem.cc ../../../src/base/task_runner.cc ../../../src/base/thread_pool.cc ../../../src/demo/credits.cc @@ -136,7 +137,6 @@ add_library(kaliber SHARED ../../../src/third_party/minizip/ioapi.c ../../../src/third_party/minizip/unzip.c ../../../src/third_party/spirv-reflect/spirv_reflect.c - ../../../src/third_party/stb/stb_image.c ../../../src/third_party/texture_compressor/dxt_encoder_internals.cc ../../../src/third_party/texture_compressor/dxt_encoder.cc ../../../src/third_party/texture_compressor/texture_compressor_etc1.cc diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn index 38341ef..221cc9d 100644 --- a/src/base/BUILD.gn +++ b/src/base/BUILD.gn @@ -8,6 +8,7 @@ source_set("base") { "interpolation.h", "log.cc", "log.h", + "mem.cc", "mem.h", "misc.h", "random.h", diff --git a/src/base/mem.cc b/src/base/mem.cc new file mode 100644 index 0000000..4adfa51 --- /dev/null +++ b/src/base/mem.cc @@ -0,0 +1,46 @@ +#include "base/mem.h" + +#include + +#if defined(__ANDROID__) +#include +#endif + +namespace base { + +void* AlignedAlloc(size_t size, size_t alignment) { + DCHECK(size > 0U); + DCHECK(IsPow2(alignment)); + DCHECK((alignment % sizeof(void*)) == 0U); + + void* ptr = nullptr; +#if defined(_WIN32) + ptr = _aligned_malloc(size, alignment); +#elif defined(__ANDROID__) + ptr = memalign(alignment, size); +#else + int ret = posix_memalign(&ptr, alignment, size); + if (ret != 0) { + DLOG(0) << "posix_memalign() returned with error " << ret; + ptr = nullptr; + } +#endif + + // Aligned allocations may fail for non-memory related reasons. + CHECK(ptr) << "Aligned allocation failed. " + << "size=" << size << ", alignment=" << alignment; + DCHECK(IsAligned(ptr, alignment)); + return ptr; +} + +void* AlignedRealloc(void* ptr, + size_t old_size, + size_t new_size, + size_t alignment) { + auto* new_ptr = AlignedAlloc(new_size, alignment); + memmove(new_ptr, ptr, old_size); + AlignedFree(ptr); + return new_ptr; +} + +} // namespace base diff --git a/src/base/mem.h b/src/base/mem.h index 698a3fb..36bbafc 100644 --- a/src/base/mem.h +++ b/src/base/mem.h @@ -1,26 +1,33 @@ #ifndef BASE_MEM_H #define BASE_MEM_H -#include +#include +#include #include -#if defined(__ANDROID__) +#if defined(_WIN32) #include +#else +#include #endif #include "base/log.h" - -#define ALIGN_MEM(alignment) __attribute__((aligned(alignment))) +#include "base/misc.h" namespace base { +inline void AlignedFree(void* mem) { +#if defined(_WIN32) + _aligned_free(mem); +#else + free(mem); +#endif +} + namespace internal { struct ScopedAlignedFree { - inline void operator()(void* x) const { - if (x) - free(x); - } + inline void operator()(void* x) const { AlignedFree(x); } }; } // namespace internal @@ -28,27 +35,16 @@ struct ScopedAlignedFree { template using AlignedMemPtr = std::unique_ptr; -template -inline void* AlignedAlloc(size_t size) { - void* ptr = NULL; -#if defined(__ANDROID__) - ptr = memalign(kAlignment, size); -#else - if (posix_memalign(&ptr, kAlignment, size)) - ptr = NULL; -#endif - DCHECK(ptr); - // DCHECK(((unsigned)ptr & (kAlignment - 1)) == 0); - return ptr; -} +void* AlignedAlloc(size_t size, size_t alignment); -inline void AlignedFree(void* mem) { - free(mem); -} +void* AlignedRealloc(void* ptr, + size_t old_size, + size_t new_size, + size_t alignment); -template -inline bool IsAligned(void* ptr) { - return (reinterpret_cast(ptr) & (kAlignment - 1)) == 0U; +inline bool IsAligned(const void* val, size_t alignment) { + DCHECK(IsPow2(alignment)) << alignment << " is not a power of 2"; + return (reinterpret_cast(val) & (alignment - 1)) == 0; } } // namespace base diff --git a/src/engine/asset/image.cc b/src/engine/asset/image.cc index f1ef622..dcf7f94 100644 --- a/src/engine/asset/image.cc +++ b/src/engine/asset/image.cc @@ -5,13 +5,19 @@ #include "base/interpolation.h" #include "base/log.h" +#include "base/mem.h" #include "base/misc.h" #include "engine/engine.h" #include "engine/platform/asset_file.h" #include "third_party/texture_compressor/texture_compressor.h" -// This 3rd party library is written in C and uses malloc, which means that we -// have to do the same. +// Use aligned memory for SIMD in texture compressor. +#define STB_IMAGE_IMPLEMENTATION +#define STBI_NO_STDIO +#define STBI_MALLOC(sz) base::AlignedAlloc(sz, 16) +#define STBI_REALLOC_SIZED(p, oldsz, newsz) \ + base::AlignedRealloc(p, oldsz, newsz, 16) +#define STBI_FREE(p) base::AlignedFree(p) #include "third_party/stb/stb_image.h" using namespace base; @@ -77,7 +83,7 @@ bool Image::Create(int w, int h) { width_ = w; height_ = h; - buffer_.reset((uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t))); + buffer_.reset((uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16)); return true; } @@ -85,7 +91,7 @@ bool Image::Create(int w, int h) { void Image::Copy(const Image& other) { if (other.buffer_) { int size = other.GetSize(); - buffer_.reset((uint8_t*)AlignedAlloc<16>(size)); + buffer_.reset((uint8_t*)AlignedAlloc(size, 16)); memcpy(buffer_.get(), other.buffer_.get(), size); } width_ = other.width_; @@ -101,7 +107,7 @@ bool Image::CreateMip(const Image& other) { width_ = std::max(other.width_ >> 1, 1); height_ = std::max(other.height_ >> 1, 1); format_ = kRGBA32; - buffer_.reset((uint8_t*)AlignedAlloc<16>(GetSize())); + buffer_.reset((uint8_t*)AlignedAlloc(GetSize(), 16)); // If the width isn't perfectly divisable with two, then we end up skewing // the image because the source offset isn't updated properly. @@ -158,7 +164,7 @@ bool Image::Load(const std::string& file_name) { // LOG(0)("Converting image from 1 to 4 channels.\n"); // Assume it's an intensity, duplicate it to RGB and fill A with opaque. converted_buffer = - (uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t)); + (uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16); for (int i = 0; i < w * h; ++i) { converted_buffer[i * 4 + 0] = buffer_[i]; converted_buffer[i * 4 + 1] = buffer_[i]; @@ -171,7 +177,7 @@ bool Image::Load(const std::string& file_name) { // LOG(0)("Converting image from 3 to 4 channels.\n"); // Add an opaque channel. converted_buffer = - (uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t)); + (uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16); for (int i = 0; i < w * h; ++i) { converted_buffer[i * 4 + 0] = buffer_[i * 3 + 0]; converted_buffer[i * 4 + 1] = buffer_[i * 3 + 1]; @@ -238,7 +244,7 @@ void Image::ConvertToPow2() { << new_width << ", " << new_height << ")"; int bigger_size = new_width * new_height * 4 * sizeof(uint8_t); - uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc<16>(bigger_size); + uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc(bigger_size, 16); // Fill it with black. memset(bigger_buffer, 0, bigger_size); @@ -296,7 +302,7 @@ bool Image::Compress() { unsigned compressedSize = GetSize(); uint8_t* compressedBuffer = - (uint8_t*)AlignedAlloc<16>(compressedSize * sizeof(uint8_t)); + (uint8_t*)AlignedAlloc(compressedSize * sizeof(uint8_t), 16); const uint8_t* src = buffer_.get(); uint8_t* dst = compressedBuffer; diff --git a/src/engine/audio/sinc_resampler.cc b/src/engine/audio/sinc_resampler.cc index c8465db..0a0a303 100644 --- a/src/engine/audio/sinc_resampler.cc +++ b/src/engine/audio/sinc_resampler.cc @@ -192,13 +192,13 @@ SincResampler::SincResampler(double io_sample_rate_ratio, int request_frames) input_buffer_size_(request_frames_ + kernel_size_), // Create input buffers with a 32-byte alignment for SIMD optimizations. kernel_storage_(static_cast( - base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), + base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))), kernel_pre_sinc_storage_(static_cast( - base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), + base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))), kernel_window_storage_(static_cast( - base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), + base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))), input_buffer_(static_cast( - base::AlignedAlloc<32>(sizeof(float) * input_buffer_size_))), + base::AlignedAlloc(sizeof(float) * input_buffer_size_, 32))), r1_(input_buffer_.get()), r2_(input_buffer_.get() + kernel_size_ / 2) { CHECK(request_frames > kernel_size_ * 3 / 2) diff --git a/src/third_party/BUILD.gn b/src/third_party/BUILD.gn index 689664e..4ad2dcf 100644 --- a/src/third_party/BUILD.gn +++ b/src/third_party/BUILD.gn @@ -59,7 +59,6 @@ source_set("third_party") { "minimp3/minimp3.h", "minimp3/minimp3_ex.h", "spirv-reflect/spirv_reflect.c", - "stb/stb_image.c", "stb/stb_image.h", "stb/stb_truetype.h", "texture_compressor/dxt_encoder.cc", diff --git a/src/third_party/stb/stb_image.c b/src/third_party/stb/stb_image.c deleted file mode 100644 index 83f11b2..0000000 --- a/src/third_party/stb/stb_image.c +++ /dev/null @@ -1,3 +0,0 @@ -#define STB_IMAGE_IMPLEMENTATION -#define STBI_NO_STDIO -#include "stb_image.h"