Use aligned memory in stb_image

This commit is contained in:
Attila Uygun 2023-08-09 01:14:30 +02:00
parent 96d6a52a74
commit 8503c549d7
8 changed files with 90 additions and 45 deletions

View File

@ -49,6 +49,7 @@ project(kaliber)
add_library(kaliber SHARED add_library(kaliber SHARED
../../../src/base/collusion_test.cc ../../../src/base/collusion_test.cc
../../../src/base/log.cc ../../../src/base/log.cc
../../../src/base/mem.cc
../../../src/base/task_runner.cc ../../../src/base/task_runner.cc
../../../src/base/thread_pool.cc ../../../src/base/thread_pool.cc
../../../src/demo/credits.cc ../../../src/demo/credits.cc
@ -136,7 +137,6 @@ add_library(kaliber SHARED
../../../src/third_party/minizip/ioapi.c ../../../src/third_party/minizip/ioapi.c
../../../src/third_party/minizip/unzip.c ../../../src/third_party/minizip/unzip.c
../../../src/third_party/spirv-reflect/spirv_reflect.c ../../../src/third_party/spirv-reflect/spirv_reflect.c
../../../src/third_party/stb/stb_image.c
../../../src/third_party/texture_compressor/dxt_encoder_internals.cc ../../../src/third_party/texture_compressor/dxt_encoder_internals.cc
../../../src/third_party/texture_compressor/dxt_encoder.cc ../../../src/third_party/texture_compressor/dxt_encoder.cc
../../../src/third_party/texture_compressor/texture_compressor_etc1.cc ../../../src/third_party/texture_compressor/texture_compressor_etc1.cc

View File

@ -8,6 +8,7 @@ source_set("base") {
"interpolation.h", "interpolation.h",
"log.cc", "log.cc",
"log.h", "log.h",
"mem.cc",
"mem.h", "mem.h",
"misc.h", "misc.h",
"random.h", "random.h",

46
src/base/mem.cc Normal file
View File

@ -0,0 +1,46 @@
#include "base/mem.h"
#include <cstring>
#if defined(__ANDROID__)
#include <malloc.h>
#endif
namespace base {
void* AlignedAlloc(size_t size, size_t alignment) {
DCHECK(size > 0U);
DCHECK(IsPow2(alignment));
DCHECK((alignment % sizeof(void*)) == 0U);
void* ptr = nullptr;
#if defined(_WIN32)
ptr = _aligned_malloc(size, alignment);
#elif defined(__ANDROID__)
ptr = memalign(alignment, size);
#else
int ret = posix_memalign(&ptr, alignment, size);
if (ret != 0) {
DLOG(0) << "posix_memalign() returned with error " << ret;
ptr = nullptr;
}
#endif
// Aligned allocations may fail for non-memory related reasons.
CHECK(ptr) << "Aligned allocation failed. "
<< "size=" << size << ", alignment=" << alignment;
DCHECK(IsAligned(ptr, alignment));
return ptr;
}
void* AlignedRealloc(void* ptr,
size_t old_size,
size_t new_size,
size_t alignment) {
auto* new_ptr = AlignedAlloc(new_size, alignment);
memmove(new_ptr, ptr, old_size);
AlignedFree(ptr);
return new_ptr;
}
} // namespace base

View File

@ -1,26 +1,33 @@
#ifndef BASE_MEM_H #ifndef BASE_MEM_H
#define BASE_MEM_H #define BASE_MEM_H
#include <cstdlib> #include <stddef.h>
#include <stdint.h>
#include <memory> #include <memory>
#if defined(__ANDROID__) #if defined(_WIN32)
#include <malloc.h> #include <malloc.h>
#else
#include <stdlib.h>
#endif #endif
#include "base/log.h" #include "base/log.h"
#include "base/misc.h"
#define ALIGN_MEM(alignment) __attribute__((aligned(alignment)))
namespace base { namespace base {
inline void AlignedFree(void* mem) {
#if defined(_WIN32)
_aligned_free(mem);
#else
free(mem);
#endif
}
namespace internal { namespace internal {
struct ScopedAlignedFree { struct ScopedAlignedFree {
inline void operator()(void* x) const { inline void operator()(void* x) const { AlignedFree(x); }
if (x)
free(x);
}
}; };
} // namespace internal } // namespace internal
@ -28,27 +35,16 @@ struct ScopedAlignedFree {
template <typename T> template <typename T>
using AlignedMemPtr = std::unique_ptr<T, internal::ScopedAlignedFree>; using AlignedMemPtr = std::unique_ptr<T, internal::ScopedAlignedFree>;
template <int kAlignment> void* AlignedAlloc(size_t size, size_t alignment);
inline void* AlignedAlloc(size_t size) {
void* ptr = NULL;
#if defined(__ANDROID__)
ptr = memalign(kAlignment, size);
#else
if (posix_memalign(&ptr, kAlignment, size))
ptr = NULL;
#endif
DCHECK(ptr);
// DCHECK(((unsigned)ptr & (kAlignment - 1)) == 0);
return ptr;
}
inline void AlignedFree(void* mem) { void* AlignedRealloc(void* ptr,
free(mem); size_t old_size,
} size_t new_size,
size_t alignment);
template <int kAlignment> inline bool IsAligned(const void* val, size_t alignment) {
inline bool IsAligned(void* ptr) { DCHECK(IsPow2(alignment)) << alignment << " is not a power of 2";
return (reinterpret_cast<uintptr_t>(ptr) & (kAlignment - 1)) == 0U; return (reinterpret_cast<uintptr_t>(val) & (alignment - 1)) == 0;
} }
} // namespace base } // namespace base

View File

@ -5,13 +5,19 @@
#include "base/interpolation.h" #include "base/interpolation.h"
#include "base/log.h" #include "base/log.h"
#include "base/mem.h"
#include "base/misc.h" #include "base/misc.h"
#include "engine/engine.h" #include "engine/engine.h"
#include "engine/platform/asset_file.h" #include "engine/platform/asset_file.h"
#include "third_party/texture_compressor/texture_compressor.h" #include "third_party/texture_compressor/texture_compressor.h"
// This 3rd party library is written in C and uses malloc, which means that we // Use aligned memory for SIMD in texture compressor.
// have to do the same. #define STB_IMAGE_IMPLEMENTATION
#define STBI_NO_STDIO
#define STBI_MALLOC(sz) base::AlignedAlloc(sz, 16)
#define STBI_REALLOC_SIZED(p, oldsz, newsz) \
base::AlignedRealloc(p, oldsz, newsz, 16)
#define STBI_FREE(p) base::AlignedFree(p)
#include "third_party/stb/stb_image.h" #include "third_party/stb/stb_image.h"
using namespace base; using namespace base;
@ -77,7 +83,7 @@ bool Image::Create(int w, int h) {
width_ = w; width_ = w;
height_ = h; height_ = h;
buffer_.reset((uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t))); buffer_.reset((uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16));
return true; return true;
} }
@ -85,7 +91,7 @@ bool Image::Create(int w, int h) {
void Image::Copy(const Image& other) { void Image::Copy(const Image& other) {
if (other.buffer_) { if (other.buffer_) {
int size = other.GetSize(); int size = other.GetSize();
buffer_.reset((uint8_t*)AlignedAlloc<16>(size)); buffer_.reset((uint8_t*)AlignedAlloc(size, 16));
memcpy(buffer_.get(), other.buffer_.get(), size); memcpy(buffer_.get(), other.buffer_.get(), size);
} }
width_ = other.width_; width_ = other.width_;
@ -101,7 +107,7 @@ bool Image::CreateMip(const Image& other) {
width_ = std::max(other.width_ >> 1, 1); width_ = std::max(other.width_ >> 1, 1);
height_ = std::max(other.height_ >> 1, 1); height_ = std::max(other.height_ >> 1, 1);
format_ = kRGBA32; format_ = kRGBA32;
buffer_.reset((uint8_t*)AlignedAlloc<16>(GetSize())); buffer_.reset((uint8_t*)AlignedAlloc(GetSize(), 16));
// If the width isn't perfectly divisable with two, then we end up skewing // If the width isn't perfectly divisable with two, then we end up skewing
// the image because the source offset isn't updated properly. // the image because the source offset isn't updated properly.
@ -158,7 +164,7 @@ bool Image::Load(const std::string& file_name) {
// LOG(0)("Converting image from 1 to 4 channels.\n"); // LOG(0)("Converting image from 1 to 4 channels.\n");
// Assume it's an intensity, duplicate it to RGB and fill A with opaque. // Assume it's an intensity, duplicate it to RGB and fill A with opaque.
converted_buffer = converted_buffer =
(uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t)); (uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16);
for (int i = 0; i < w * h; ++i) { for (int i = 0; i < w * h; ++i) {
converted_buffer[i * 4 + 0] = buffer_[i]; converted_buffer[i * 4 + 0] = buffer_[i];
converted_buffer[i * 4 + 1] = buffer_[i]; converted_buffer[i * 4 + 1] = buffer_[i];
@ -171,7 +177,7 @@ bool Image::Load(const std::string& file_name) {
// LOG(0)("Converting image from 3 to 4 channels.\n"); // LOG(0)("Converting image from 3 to 4 channels.\n");
// Add an opaque channel. // Add an opaque channel.
converted_buffer = converted_buffer =
(uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t)); (uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16);
for (int i = 0; i < w * h; ++i) { for (int i = 0; i < w * h; ++i) {
converted_buffer[i * 4 + 0] = buffer_[i * 3 + 0]; converted_buffer[i * 4 + 0] = buffer_[i * 3 + 0];
converted_buffer[i * 4 + 1] = buffer_[i * 3 + 1]; converted_buffer[i * 4 + 1] = buffer_[i * 3 + 1];
@ -238,7 +244,7 @@ void Image::ConvertToPow2() {
<< new_width << ", " << new_height << ")"; << new_width << ", " << new_height << ")";
int bigger_size = new_width * new_height * 4 * sizeof(uint8_t); int bigger_size = new_width * new_height * 4 * sizeof(uint8_t);
uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc<16>(bigger_size); uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc(bigger_size, 16);
// Fill it with black. // Fill it with black.
memset(bigger_buffer, 0, bigger_size); memset(bigger_buffer, 0, bigger_size);
@ -296,7 +302,7 @@ bool Image::Compress() {
unsigned compressedSize = GetSize(); unsigned compressedSize = GetSize();
uint8_t* compressedBuffer = uint8_t* compressedBuffer =
(uint8_t*)AlignedAlloc<16>(compressedSize * sizeof(uint8_t)); (uint8_t*)AlignedAlloc(compressedSize * sizeof(uint8_t), 16);
const uint8_t* src = buffer_.get(); const uint8_t* src = buffer_.get();
uint8_t* dst = compressedBuffer; uint8_t* dst = compressedBuffer;

View File

@ -192,13 +192,13 @@ SincResampler::SincResampler(double io_sample_rate_ratio, int request_frames)
input_buffer_size_(request_frames_ + kernel_size_), input_buffer_size_(request_frames_ + kernel_size_),
// Create input buffers with a 32-byte alignment for SIMD optimizations. // Create input buffers with a 32-byte alignment for SIMD optimizations.
kernel_storage_(static_cast<float*>( kernel_storage_(static_cast<float*>(
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
kernel_pre_sinc_storage_(static_cast<float*>( kernel_pre_sinc_storage_(static_cast<float*>(
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
kernel_window_storage_(static_cast<float*>( kernel_window_storage_(static_cast<float*>(
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))), base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
input_buffer_(static_cast<float*>( input_buffer_(static_cast<float*>(
base::AlignedAlloc<32>(sizeof(float) * input_buffer_size_))), base::AlignedAlloc(sizeof(float) * input_buffer_size_, 32))),
r1_(input_buffer_.get()), r1_(input_buffer_.get()),
r2_(input_buffer_.get() + kernel_size_ / 2) { r2_(input_buffer_.get() + kernel_size_ / 2) {
CHECK(request_frames > kernel_size_ * 3 / 2) CHECK(request_frames > kernel_size_ * 3 / 2)

View File

@ -59,7 +59,6 @@ source_set("third_party") {
"minimp3/minimp3.h", "minimp3/minimp3.h",
"minimp3/minimp3_ex.h", "minimp3/minimp3_ex.h",
"spirv-reflect/spirv_reflect.c", "spirv-reflect/spirv_reflect.c",
"stb/stb_image.c",
"stb/stb_image.h", "stb/stb_image.h",
"stb/stb_truetype.h", "stb/stb_truetype.h",
"texture_compressor/dxt_encoder.cc", "texture_compressor/dxt_encoder.cc",

View File

@ -1,3 +0,0 @@
#define STB_IMAGE_IMPLEMENTATION
#define STBI_NO_STDIO
#include "stb_image.h"