Skip to content

Commit

Permalink
limit number of buckets for "overflow" + fix function names + clang f…
Browse files Browse the repository at this point in the history
…ormat
  • Loading branch information
SergeyMakeev committed Oct 8, 2023
1 parent 171e12d commit aa54e6a
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 42 deletions.
10 changes: 5 additions & 5 deletions SmMalloc/smmalloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,8 @@ void TlsPoolBucket::Init(uint32_t* pCacheStack, uint32_t maxElementsNum, CacheWa
pBucketData = pBucket->pData;

// warmup cache
size_t elementSize = sm::getBucketSizeInBytesByIndex(bucketIndex);

if (warmupOptions == CACHE_COLD || !IsAligned(elementSize, 16))
size_t elementSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);
if (warmupOptions == CACHE_COLD)
{
return;
}
Expand Down Expand Up @@ -129,6 +128,7 @@ uint32_t* TlsPoolBucket::Destroy()

void Allocator::CreateThreadCache(CacheWarmupOptions warmupOptions, std::initializer_list<uint32_t> options)
{
// thread cache configuration is invalid
SM_ASSERT(bucketsCount >= options.size());

size_t i = 0;
Expand All @@ -141,7 +141,7 @@ void Allocator::CreateThreadCache(CacheWarmupOptions warmupOptions, std::initial

uint32_t elementsNum = _elementsNum + SMM_MAX_CACHE_ITEMS_COUNT;

// allocate stack for cache
// allocate stack for cache indices
uint32_t* localStack = (uint32_t*)GenericAllocator::Alloc(gAllocator, elementsNum * sizeof(uint32_t), SMM_CACHE_LINE_SIZE);

// initialize
Expand Down Expand Up @@ -266,7 +266,7 @@ void Allocator::Init(uint32_t _bucketsCount, size_t _bucketSizeInBytes)
PoolBucket& bucket = buckets[i];
bucket.pData = pBuffer.get() + i * bucketSizeInBytes;
bucket.pBufferEnd = bucket.pData + bucketSizeInBytes;
size_t bucketSizeInBytes = getBucketSizeInBytesByIndex(i);
size_t bucketSizeInBytes = GetBucketSizeInBytesByIndex(i);
SM_ASSERT(IsAligned(bucketSizeInBytes, kMinValidAlignment));
SM_ASSERT(IsAligned(size_t(bucket.pData), alignmentMax) && "Incorrect alignment detected!");
bucket.Create(bucketSizeInBytes);
Expand Down
39 changes: 22 additions & 17 deletions SmMalloc/smmalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@

//#define SMM_LINEAR_PARTITIONING
/*
Simple linear partitioning: every bucket size grow by 16 bytes
Note: fastest but can be wastefull because bucket size distribution is not ideal (works pretty well for a small number of buckets)
Expand Down Expand Up @@ -104,7 +104,8 @@
13->160, 14->192, 15->224, 16->256, 17->320, 18->384, 19->448, 20->512, 21->640, 22->768, 23->896, 24->1024,
25->1280, 26->1536, 27->1792, 28->2048, 29->2560, 30->3072, 31->3584, 32->4096, 33->5120, 34->6144, 35->7168, 36->8192,
37->10240, 38->12288, 39->14336, 40->16384, 41->20480, 42->24576, 43->28672, 44->32768, 45->40960, 46->49152, 47->57344, 48->65536,
49->81920, 50->98304, 51->114688, 52->131072, 53->163840, 54->196608, 55->229376, 56->262144, 57->327680, 58->393216, 59->458752, 60->524288, 61->655360
49->81920, 50->98304, 51->114688, 52->131072, 53->163840, 54->196608, 55->229376, 56->262144, 57->327680, 58->393216, 59->458752,
60->524288, 61->655360
*/
#endif

Expand Down Expand Up @@ -152,7 +153,7 @@ struct GlobalStats
std::atomic<size_t> totalAllocationsRoutedToDefaultAllocator;
std::atomic<size_t> routingReasonBySize;
std::atomic<size_t> routingReasonSaturation;

GlobalStats()
{
totalNumAllocationAttempts.store(0);
Expand All @@ -169,7 +170,7 @@ struct BucketStats
std::atomic<size_t> hitCount;
std::atomic<size_t> missCount;
std::atomic<size_t> freeCount;

BucketStats()
{
cacheHitCount.store(0);
Expand Down Expand Up @@ -275,7 +276,7 @@ SMM_INLINE size_t Align(size_t val, size_t alignment)
return r;
}

SMM_INLINE size_t getBucketIndexBySize(size_t bytesCount)
SMM_INLINE size_t GetBucketIndexBySize(size_t bytesCount)
{
#if defined(SMM_LINEAR_PARTITIONING)
size_t bucketIndex = ((bytesCount - 1) >> 4);
Expand All @@ -295,11 +296,11 @@ SMM_INLINE size_t getBucketIndexBySize(size_t bytesCount)
size_t bucketIndex = (size <= 127) ? p0 : ((size > 1023) ? p2 : p1);
return bucketIndex;
#else
#error Unknown partitioning scheme!
#error Unknown partitioning scheme!
#endif
}

SMM_INLINE size_t getBucketSizeInBytesByIndex(size_t bucketIndex)
SMM_INLINE size_t GetBucketSizeInBytesByIndex(size_t bucketIndex)
{
#if defined(SMM_LINEAR_PARTITIONING)
size_t sizeInBytes = 16 + bucketIndex * 16;
Expand All @@ -320,6 +321,8 @@ SMM_INLINE size_t getBucketSizeInBytesByIndex(size_t bucketIndex)
#endif
}

SMM_INLINE size_t Min(size_t a, size_t b) { return (a < b) ? a : b; }

struct GenericAllocator
{
typedef void* TInstance;
Expand Down Expand Up @@ -388,7 +391,7 @@ class Allocator
#ifdef SMMALLOC_STATS_SUPPORT
BucketStats bucketStats;
#endif

PoolBucket()
: head(TaggedIndex::Invalid)
, pData(nullptr)
Expand Down Expand Up @@ -570,12 +573,12 @@ class Allocator
#endif

size_t bytesCount = Align(_bytesCount, alignment);
size_t bucketIndex = getBucketIndexBySize(bytesCount);
size_t bucketIndex = GetBucketIndexBySize(bytesCount);

#ifdef SMMALLOC_STATS_SUPPORT
bool isValidBucket = false;
#endif

if (bucketIndex < bucketsCount)
{
#ifdef SMMALLOC_STATS_SUPPORT
Expand All @@ -596,7 +599,9 @@ class Allocator
}
}

while (bucketIndex < bucketsCount)
// never "overflow" allocation to more than 4 buckets (for performance reasons)
const size_t maxBucketIndex = Min(bucketsCount, bucketIndex + 4);
while (bucketIndex < maxBucketIndex)
{
void* pRes = buckets[bucketIndex].Alloc();
if (pRes)
Expand Down Expand Up @@ -624,7 +629,7 @@ class Allocator
do
{
bucketIndex++;
} while (!IsAligned(getBucketSizeInBytesByIndex(bucketIndex), alignment));
} while (!IsAligned(GetBucketSizeInBytesByIndex(bucketIndex), alignment));
}

#ifdef SMMALLOC_STATS_SUPPORT
Expand Down Expand Up @@ -698,7 +703,7 @@ class Allocator
size_t bucketIndex = FindBucket(p);
if (bucketIndex < bucketsCount)
{
size_t elementSize = getBucketSizeInBytesByIndex(bucketIndex);
size_t elementSize = GetBucketSizeInBytesByIndex(bucketIndex);
if (bytesCount <= elementSize)
{
// reuse existing memory
Expand Down Expand Up @@ -734,7 +739,7 @@ class Allocator

// check if we need to realloc from generic allocator to smmalloc
size_t __bytesCount = Align(bytesCount, alignment);
size_t __bucketIndex = getBucketIndexBySize(__bytesCount);
size_t __bucketIndex = GetBucketIndexBySize(__bytesCount);
if (__bucketIndex < bucketsCount)
{
void* p2 = Alloc(bytesCount, alignment);
Expand Down Expand Up @@ -765,7 +770,7 @@ class Allocator
size_t bucketIndex = FindBucket(p);
if (bucketIndex < bucketsCount)
{
size_t elementSize = getBucketSizeInBytesByIndex(bucketIndex);
size_t elementSize = GetBucketSizeInBytesByIndex(bucketIndex);
return elementSize;
}

Expand Down Expand Up @@ -799,7 +804,7 @@ class Allocator
return 0;
}

size_t oneElementSize = getBucketSizeInBytesByIndex(bucketIndex);
size_t oneElementSize = GetBucketSizeInBytesByIndex(bucketIndex);
return (uint32_t)(bucketSizeInBytes / oneElementSize);
}

Expand Down Expand Up @@ -1031,7 +1036,7 @@ extern "C"
}

SMMALLOC_API SMM_INLINE void _sm_allocator_thread_cache_create(sm_allocator allocator, sm::CacheWarmupOptions warmupOptions,
std::initializer_list<uint32_t> options)
std::initializer_list<uint32_t> options)
{
if (allocator == nullptr)
{
Expand Down
2 changes: 1 addition & 1 deletion smmalloc_perf01.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void printDebug(sm_allocator heap)
for (size_t bucketIndex = 0; bucketIndex < bucketsCount; bucketIndex++)
{
uint32_t elementsCount = heap->GetBucketElementsCount(bucketIndex);
size_t elementsSize = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t elementsSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);
printf("Bucket[%zu], Elements[%d], SizeOf[%zu] -----\n", bucketIndex, elementsCount, elementsSize);
const sm::BucketStats* stats = heap->GetBucketStats(bucketIndex);
if (!stats)
Expand Down
25 changes: 13 additions & 12 deletions smmalloc_perf02.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#include <ubench.h>
#include <vector>
#include <smmalloc.h>
#include <dlmalloc.h>
#include <rpmalloc.h>
#include <smmalloc.h>
#include <ubench.h>
#include <vector>

#if defined(_WIN32)
#include <mimalloc.h>
#include <hoard.h>
#include <mimalloc.h>
#endif


struct UBenchGlobals
{
static const int kNumAllocations = 10000000;
Expand All @@ -20,14 +19,14 @@ struct UBenchGlobals

UBenchGlobals()
{
srand(2345);
srand(1306);

// num allocations
randomSequence.resize(kNumAllocations);
for (size_t i = 0; i < randomSequence.size(); i++)
{
// 16 - 80 bytes
size_t sz = 16 + (rand() % 64);
// 16 - 256 bytes
size_t sz = 16 + (rand() % 240);
randomSequence[i] = sz;
}

Expand All @@ -49,7 +48,10 @@ UBENCH_EX(PerfTest, smmalloc_10m)
UBenchGlobals& g = UBenchGlobals::get();
size_t wsSize = g.workingSet.size();

sm_allocator space = _sm_allocator_create(20, (48 * 1024 * 1024));
//
sm_allocator space = _sm_allocator_create(18, (48 * 1024 * 1024));
_sm_allocator_thread_cache_create(space, sm::CACHE_COLD,
{512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512});

UBENCH_DO_BENCHMARK()
{
Expand Down Expand Up @@ -94,7 +96,7 @@ UBENCH_EX(PerfTest, smmalloc_10m)
for (size_t bucketIndex = 0; bucketIndex < bucketsCount; bucketIndex++)
{
uint32_t elementsCount = space->GetBucketElementsCount(bucketIndex);
size_t elementsSize = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t elementsSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);
printf("Bucket[%zu], Elements[%d], SizeOf[%zu] -----\n", bucketIndex, elementsCount, elementsSize);
const sm::BucketStats* stats = space->GetBucketStats(bucketIndex);
if (!stats)
Expand All @@ -109,6 +111,7 @@ UBENCH_EX(PerfTest, smmalloc_10m)
}
#endif

_sm_allocator_thread_cache_destroy(space);
_sm_allocator_destroy(space);
}

Expand Down Expand Up @@ -251,8 +254,6 @@ UBENCH_EX(PerfTest, mi_malloc_10m)
g.workingSet[i] = nullptr;
}
}

}

#endif

2 changes: 1 addition & 1 deletion smmalloc_perf_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ extern void compare_allocators();
UBENCH_STATE();
int main(int argc, const char* const argv[])
{
compare_allocators();
int res = ubench_main(argc, argv);
compare_allocators();
return res;
}
8 changes: 4 additions & 4 deletions smmalloc_test01.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ TEST(SimpleTests, BucketSizeTest)
{
for (size_t elemSize = 1; elemSize < (1024 * 1024); elemSize++)
{
size_t bucketIndex = sm::getBucketIndexBySize(elemSize);
size_t bucketSizeInBytes = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t bucketIndex = sm::GetBucketIndexBySize(elemSize);
size_t bucketSizeInBytes = sm::GetBucketSizeInBytesByIndex(bucketIndex);
ASSERT_TRUE(elemSize <= bucketSizeInBytes);
}
}
Expand All @@ -82,7 +82,7 @@ TEST(SimpleTests, BucketSizeAlignment)
// make sure bucket sizes are at least aligned to kMinValidAlignment
for (size_t bucketIndex = 0; bucketIndex < SMM_MAX_BUCKET_COUNT; bucketIndex++)
{
size_t bucketSizeInBytes = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t bucketSizeInBytes = sm::GetBucketSizeInBytesByIndex(bucketIndex);
ASSERT_TRUE(_IsAligned(bucketSizeInBytes, sm::Allocator::kMinValidAlignment));
}
}
Expand Down Expand Up @@ -199,7 +199,7 @@ TEST(SimpleTests, MegaAlloc)
for (size_t bucketIndex = 0; bucketIndex < bucketsCount; bucketIndex++)
{
uint32_t elementsCount = heap->GetBucketElementsCount(bucketIndex);
size_t elementsSize = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t elementsSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);
printf("Bucket[%zu], Elements[%d], SizeOf[%zu] -----\n", bucketIndex, elementsCount, elementsSize);
const sm::BucketStats* stats = heap->GetBucketStats(bucketIndex);
if (!stats)
Expand Down
4 changes: 2 additions & 2 deletions smmalloc_test02.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ TEST(MultithreadingTests, StressTest)

for (int32_t bucketIndex = 0; bucketIndex < (int32_t)bucketsCount; bucketIndex++)
{
size_t elementSize = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t elementSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);

size_t maxCount = heap->GetBucketElementsCount(bucketIndex);

Expand Down Expand Up @@ -203,7 +203,7 @@ TEST(MultithreadingTests, MtPerformance)
for (size_t bucketIndex = 0; bucketIndex < bucketsCount; bucketIndex++)
{
uint32_t elementsCount = heap->GetBucketElementsCount(bucketIndex);
size_t elementsSize = sm::getBucketSizeInBytesByIndex(bucketIndex);
size_t elementsSize = sm::GetBucketSizeInBytesByIndex(bucketIndex);
printf("Bucket[%zu], Elements[%d], SizeOf[%zu] -----\n", bucketIndex, elementsCount, elementsSize);
const sm::BucketStats* stats = heap->GetBucketStats(bucketIndex);
if (!stats)
Expand Down

0 comments on commit aa54e6a

Please sign in to comment.