From d84f2b23074dd6faf4a8e4c49e033204a6464808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Branimir=20Karad=C5=BEi=C4=87?= Date: Thu, 15 Oct 2015 16:35:21 -0700 Subject: [PATCH] Added radix sort just for keys. --- include/bx/radixsort.h | 126 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 124 insertions(+), 2 deletions(-) diff --git a/include/bx/radixsort.h b/include/bx/radixsort.h index 9ed4011..d28f651 100644 --- a/include/bx/radixsort.h +++ b/include/bx/radixsort.h @@ -14,8 +14,69 @@ namespace bx #define BX_RADIXSORT_HISTOGRAM_SIZE (1<>shift)&BX_RADIXSORT_BIT_MASK; + ++histogram[index]; + sorted &= prevKey <= key; + } + } + + if (sorted) + { + goto done; + } + + uint32_t offset = 0; + for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) + { + uint32_t count = histogram[ii]; + histogram[ii] = offset; + offset += count; + } + + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint32_t key = keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + uint32_t dest = histogram[index]++; + tempKeys[dest] = key; + } + + uint32_t* swapKeys = tempKeys; + tempKeys = keys; + keys = swapKeys; + + shift += BX_RADIXSORT_BITS; + } + +done: + if (0 != (pass&1) ) + { + // Odd number of passes needs to do copy to the destination. + memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) ); + } + } + template - void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size) + inline void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size) { uint32_t* __restrict keys = _keys; uint32_t* __restrict tempKeys = _tempKeys; @@ -87,8 +148,69 @@ done: } } + inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, uint32_t _size) + { + uint64_t* __restrict keys = _keys; + uint64_t* __restrict tempKeys = _tempKeys; + + uint32_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE]; + uint16_t shift = 0; + uint32_t pass = 0; + for (; pass < 6; ++pass) + { + memset(histogram, 0, sizeof(uint32_t)*BX_RADIXSORT_HISTOGRAM_SIZE); + + bool sorted = true; + { + uint64_t key = keys[0]; + uint64_t prevKey = key; + for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key) + { + key = keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + ++histogram[index]; + sorted &= prevKey <= key; + } + } + + if (sorted) + { + goto done; + } + + uint32_t offset = 0; + for (uint32_t ii = 0; ii < BX_RADIXSORT_HISTOGRAM_SIZE; ++ii) + { + uint32_t count = histogram[ii]; + histogram[ii] = offset; + offset += count; + } + + for (uint32_t ii = 0; ii < _size; ++ii) + { + uint64_t key = keys[ii]; + uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK; + uint32_t dest = histogram[index]++; + tempKeys[dest] = key; + } + + uint64_t* swapKeys = tempKeys; + tempKeys = keys; + keys = swapKeys; + + shift += BX_RADIXSORT_BITS; + } + +done: + if (0 != (pass&1) ) + { + // Odd number of passes needs to do copy to the destination. + memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) ); + } + } + template - void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size) + inline void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size) { uint64_t* __restrict keys = _keys; uint64_t* __restrict tempKeys = _tempKeys;