mirror of https://github.com/auygun/kaliber.git
Use aligned memory in stb_image
This commit is contained in:
parent
96d6a52a74
commit
8503c549d7
|
@ -49,6 +49,7 @@ project(kaliber)
|
|||
add_library(kaliber SHARED
|
||||
../../../src/base/collusion_test.cc
|
||||
../../../src/base/log.cc
|
||||
../../../src/base/mem.cc
|
||||
../../../src/base/task_runner.cc
|
||||
../../../src/base/thread_pool.cc
|
||||
../../../src/demo/credits.cc
|
||||
|
@ -136,7 +137,6 @@ add_library(kaliber SHARED
|
|||
../../../src/third_party/minizip/ioapi.c
|
||||
../../../src/third_party/minizip/unzip.c
|
||||
../../../src/third_party/spirv-reflect/spirv_reflect.c
|
||||
../../../src/third_party/stb/stb_image.c
|
||||
../../../src/third_party/texture_compressor/dxt_encoder_internals.cc
|
||||
../../../src/third_party/texture_compressor/dxt_encoder.cc
|
||||
../../../src/third_party/texture_compressor/texture_compressor_etc1.cc
|
||||
|
|
|
@ -8,6 +8,7 @@ source_set("base") {
|
|||
"interpolation.h",
|
||||
"log.cc",
|
||||
"log.h",
|
||||
"mem.cc",
|
||||
"mem.h",
|
||||
"misc.h",
|
||||
"random.h",
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
#include "base/mem.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
namespace base {
|
||||
|
||||
void* AlignedAlloc(size_t size, size_t alignment) {
|
||||
DCHECK(size > 0U);
|
||||
DCHECK(IsPow2(alignment));
|
||||
DCHECK((alignment % sizeof(void*)) == 0U);
|
||||
|
||||
void* ptr = nullptr;
|
||||
#if defined(_WIN32)
|
||||
ptr = _aligned_malloc(size, alignment);
|
||||
#elif defined(__ANDROID__)
|
||||
ptr = memalign(alignment, size);
|
||||
#else
|
||||
int ret = posix_memalign(&ptr, alignment, size);
|
||||
if (ret != 0) {
|
||||
DLOG(0) << "posix_memalign() returned with error " << ret;
|
||||
ptr = nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Aligned allocations may fail for non-memory related reasons.
|
||||
CHECK(ptr) << "Aligned allocation failed. "
|
||||
<< "size=" << size << ", alignment=" << alignment;
|
||||
DCHECK(IsAligned(ptr, alignment));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* AlignedRealloc(void* ptr,
|
||||
size_t old_size,
|
||||
size_t new_size,
|
||||
size_t alignment) {
|
||||
auto* new_ptr = AlignedAlloc(new_size, alignment);
|
||||
memmove(new_ptr, ptr, old_size);
|
||||
AlignedFree(ptr);
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
} // namespace base
|
|
@ -1,26 +1,33 @@
|
|||
#ifndef BASE_MEM_H
|
||||
#define BASE_MEM_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <memory>
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
#if defined(_WIN32)
|
||||
#include <malloc.h>
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#include "base/log.h"
|
||||
|
||||
#define ALIGN_MEM(alignment) __attribute__((aligned(alignment)))
|
||||
#include "base/misc.h"
|
||||
|
||||
namespace base {
|
||||
|
||||
inline void AlignedFree(void* mem) {
|
||||
#if defined(_WIN32)
|
||||
_aligned_free(mem);
|
||||
#else
|
||||
free(mem);
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
||||
struct ScopedAlignedFree {
|
||||
inline void operator()(void* x) const {
|
||||
if (x)
|
||||
free(x);
|
||||
}
|
||||
inline void operator()(void* x) const { AlignedFree(x); }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
@ -28,27 +35,16 @@ struct ScopedAlignedFree {
|
|||
template <typename T>
|
||||
using AlignedMemPtr = std::unique_ptr<T, internal::ScopedAlignedFree>;
|
||||
|
||||
template <int kAlignment>
|
||||
inline void* AlignedAlloc(size_t size) {
|
||||
void* ptr = NULL;
|
||||
#if defined(__ANDROID__)
|
||||
ptr = memalign(kAlignment, size);
|
||||
#else
|
||||
if (posix_memalign(&ptr, kAlignment, size))
|
||||
ptr = NULL;
|
||||
#endif
|
||||
DCHECK(ptr);
|
||||
// DCHECK(((unsigned)ptr & (kAlignment - 1)) == 0);
|
||||
return ptr;
|
||||
}
|
||||
void* AlignedAlloc(size_t size, size_t alignment);
|
||||
|
||||
inline void AlignedFree(void* mem) {
|
||||
free(mem);
|
||||
}
|
||||
void* AlignedRealloc(void* ptr,
|
||||
size_t old_size,
|
||||
size_t new_size,
|
||||
size_t alignment);
|
||||
|
||||
template <int kAlignment>
|
||||
inline bool IsAligned(void* ptr) {
|
||||
return (reinterpret_cast<uintptr_t>(ptr) & (kAlignment - 1)) == 0U;
|
||||
inline bool IsAligned(const void* val, size_t alignment) {
|
||||
DCHECK(IsPow2(alignment)) << alignment << " is not a power of 2";
|
||||
return (reinterpret_cast<uintptr_t>(val) & (alignment - 1)) == 0;
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
|
|
|
@ -5,13 +5,19 @@
|
|||
|
||||
#include "base/interpolation.h"
|
||||
#include "base/log.h"
|
||||
#include "base/mem.h"
|
||||
#include "base/misc.h"
|
||||
#include "engine/engine.h"
|
||||
#include "engine/platform/asset_file.h"
|
||||
#include "third_party/texture_compressor/texture_compressor.h"
|
||||
|
||||
// This 3rd party library is written in C and uses malloc, which means that we
|
||||
// have to do the same.
|
||||
// Use aligned memory for SIMD in texture compressor.
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STBI_NO_STDIO
|
||||
#define STBI_MALLOC(sz) base::AlignedAlloc(sz, 16)
|
||||
#define STBI_REALLOC_SIZED(p, oldsz, newsz) \
|
||||
base::AlignedRealloc(p, oldsz, newsz, 16)
|
||||
#define STBI_FREE(p) base::AlignedFree(p)
|
||||
#include "third_party/stb/stb_image.h"
|
||||
|
||||
using namespace base;
|
||||
|
@ -77,7 +83,7 @@ bool Image::Create(int w, int h) {
|
|||
width_ = w;
|
||||
height_ = h;
|
||||
|
||||
buffer_.reset((uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t)));
|
||||
buffer_.reset((uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -85,7 +91,7 @@ bool Image::Create(int w, int h) {
|
|||
void Image::Copy(const Image& other) {
|
||||
if (other.buffer_) {
|
||||
int size = other.GetSize();
|
||||
buffer_.reset((uint8_t*)AlignedAlloc<16>(size));
|
||||
buffer_.reset((uint8_t*)AlignedAlloc(size, 16));
|
||||
memcpy(buffer_.get(), other.buffer_.get(), size);
|
||||
}
|
||||
width_ = other.width_;
|
||||
|
@ -101,7 +107,7 @@ bool Image::CreateMip(const Image& other) {
|
|||
width_ = std::max(other.width_ >> 1, 1);
|
||||
height_ = std::max(other.height_ >> 1, 1);
|
||||
format_ = kRGBA32;
|
||||
buffer_.reset((uint8_t*)AlignedAlloc<16>(GetSize()));
|
||||
buffer_.reset((uint8_t*)AlignedAlloc(GetSize(), 16));
|
||||
|
||||
// If the width isn't perfectly divisable with two, then we end up skewing
|
||||
// the image because the source offset isn't updated properly.
|
||||
|
@ -158,7 +164,7 @@ bool Image::Load(const std::string& file_name) {
|
|||
// LOG(0)("Converting image from 1 to 4 channels.\n");
|
||||
// Assume it's an intensity, duplicate it to RGB and fill A with opaque.
|
||||
converted_buffer =
|
||||
(uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t));
|
||||
(uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16);
|
||||
for (int i = 0; i < w * h; ++i) {
|
||||
converted_buffer[i * 4 + 0] = buffer_[i];
|
||||
converted_buffer[i * 4 + 1] = buffer_[i];
|
||||
|
@ -171,7 +177,7 @@ bool Image::Load(const std::string& file_name) {
|
|||
// LOG(0)("Converting image from 3 to 4 channels.\n");
|
||||
// Add an opaque channel.
|
||||
converted_buffer =
|
||||
(uint8_t*)AlignedAlloc<16>(w * h * 4 * sizeof(uint8_t));
|
||||
(uint8_t*)AlignedAlloc(w * h * 4 * sizeof(uint8_t), 16);
|
||||
for (int i = 0; i < w * h; ++i) {
|
||||
converted_buffer[i * 4 + 0] = buffer_[i * 3 + 0];
|
||||
converted_buffer[i * 4 + 1] = buffer_[i * 3 + 1];
|
||||
|
@ -238,7 +244,7 @@ void Image::ConvertToPow2() {
|
|||
<< new_width << ", " << new_height << ")";
|
||||
|
||||
int bigger_size = new_width * new_height * 4 * sizeof(uint8_t);
|
||||
uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc<16>(bigger_size);
|
||||
uint8_t* bigger_buffer = (uint8_t*)AlignedAlloc(bigger_size, 16);
|
||||
|
||||
// Fill it with black.
|
||||
memset(bigger_buffer, 0, bigger_size);
|
||||
|
@ -296,7 +302,7 @@ bool Image::Compress() {
|
|||
|
||||
unsigned compressedSize = GetSize();
|
||||
uint8_t* compressedBuffer =
|
||||
(uint8_t*)AlignedAlloc<16>(compressedSize * sizeof(uint8_t));
|
||||
(uint8_t*)AlignedAlloc(compressedSize * sizeof(uint8_t), 16);
|
||||
|
||||
const uint8_t* src = buffer_.get();
|
||||
uint8_t* dst = compressedBuffer;
|
||||
|
|
|
@ -192,13 +192,13 @@ SincResampler::SincResampler(double io_sample_rate_ratio, int request_frames)
|
|||
input_buffer_size_(request_frames_ + kernel_size_),
|
||||
// Create input buffers with a 32-byte alignment for SIMD optimizations.
|
||||
kernel_storage_(static_cast<float*>(
|
||||
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))),
|
||||
base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
|
||||
kernel_pre_sinc_storage_(static_cast<float*>(
|
||||
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))),
|
||||
base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
|
||||
kernel_window_storage_(static_cast<float*>(
|
||||
base::AlignedAlloc<32>(sizeof(float) * kernel_storage_size_))),
|
||||
base::AlignedAlloc(sizeof(float) * kernel_storage_size_, 32))),
|
||||
input_buffer_(static_cast<float*>(
|
||||
base::AlignedAlloc<32>(sizeof(float) * input_buffer_size_))),
|
||||
base::AlignedAlloc(sizeof(float) * input_buffer_size_, 32))),
|
||||
r1_(input_buffer_.get()),
|
||||
r2_(input_buffer_.get() + kernel_size_ / 2) {
|
||||
CHECK(request_frames > kernel_size_ * 3 / 2)
|
||||
|
|
|
@ -59,7 +59,6 @@ source_set("third_party") {
|
|||
"minimp3/minimp3.h",
|
||||
"minimp3/minimp3_ex.h",
|
||||
"spirv-reflect/spirv_reflect.c",
|
||||
"stb/stb_image.c",
|
||||
"stb/stb_image.h",
|
||||
"stb/stb_truetype.h",
|
||||
"texture_compressor/dxt_encoder.cc",
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#define STBI_NO_STDIO
|
||||
#include "stb_image.h"
|
Loading…
Reference in New Issue