mirror of
https://github.com/bkaradzic/bx.git
synced 2026-02-17 20:52:37 +01:00
Added early out if data is already sorted. Fixed radixSort32 outputing results into temp buffers.
This commit is contained in:
@@ -15,18 +15,34 @@ namespace bx
|
||||
#define BX_RADIXSORT_BIT_MASK (BX_RADIXSORT_HISTOGRAM_SIZE-1)
|
||||
|
||||
template <typename Ty>
|
||||
void radixSort32(uint32_t* _keys, uint32_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)
|
||||
void radixSort32(uint32_t* __restrict _keys, uint32_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
|
||||
{
|
||||
uint32_t* __restrict keys = _keys;
|
||||
uint32_t* __restrict tempKeys = _tempKeys;
|
||||
Ty* __restrict values = _values;
|
||||
Ty* __restrict tempValues = _tempValues;
|
||||
|
||||
uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
|
||||
uint16_t shift = 0;
|
||||
for (uint32_t pass = 0; pass < 3; ++pass)
|
||||
uint32_t pass = 0;
|
||||
for (; pass < 3; ++pass)
|
||||
{
|
||||
memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
|
||||
bool sorted = true;
|
||||
uint32_t key = keys[0];
|
||||
uint32_t prevKey = key;
|
||||
for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
|
||||
{
|
||||
uint32_t key = _keys[ii];
|
||||
key = keys[ii];
|
||||
uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
|
||||
++histogram[index];
|
||||
sorted &= prevKey <= key;
|
||||
}
|
||||
|
||||
if (sorted)
|
||||
{
|
||||
goto done;
|
||||
}
|
||||
|
||||
uint16_t offset = 0;
|
||||
@@ -39,38 +55,65 @@ namespace bx
|
||||
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
{
|
||||
uint32_t key = _keys[ii];
|
||||
uint32_t key = keys[ii];
|
||||
uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
|
||||
uint16_t dest = histogram[index]++;
|
||||
_tempKeys[dest] = key;
|
||||
_tempValues[dest] = _values[ii];
|
||||
tempKeys[dest] = key;
|
||||
tempValues[dest] = values[ii];
|
||||
}
|
||||
|
||||
uint32_t* swapKeys = _tempKeys;
|
||||
_tempKeys = _keys;
|
||||
_keys = swapKeys;
|
||||
uint32_t* swapKeys = tempKeys;
|
||||
tempKeys = keys;
|
||||
keys = swapKeys;
|
||||
|
||||
Ty* swapValues = _tempValues;
|
||||
_tempValues = _values;
|
||||
_values = swapValues;
|
||||
Ty* swapValues = tempValues;
|
||||
tempValues = values;
|
||||
values = swapValues;
|
||||
|
||||
shift += BX_RADIXSORT_BITS;
|
||||
}
|
||||
|
||||
done:
|
||||
if (0 != (pass&1) )
|
||||
{
|
||||
// Odd number of passes needs to do copy to the destination.
|
||||
memcpy(_keys, _tempKeys, _size*sizeof(uint32_t) );
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
{
|
||||
_values[ii] = _tempValues[ii];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ty>
|
||||
void radixSort64(uint64_t* _keys, uint64_t* _tempKeys, Ty* _values, Ty* _tempValues, uint32_t _size)
|
||||
void radixSort64(uint64_t* __restrict _keys, uint64_t* __restrict _tempKeys, Ty* __restrict _values, Ty* __restrict _tempValues, uint32_t _size)
|
||||
{
|
||||
uint64_t* __restrict keys = _keys;
|
||||
uint64_t* __restrict tempKeys = _tempKeys;
|
||||
Ty* __restrict values = _values;
|
||||
Ty* __restrict tempValues = _tempValues;
|
||||
|
||||
uint16_t histogram[BX_RADIXSORT_HISTOGRAM_SIZE];
|
||||
uint16_t shift = 0;
|
||||
for (uint32_t pass = 0; pass < 6; ++pass)
|
||||
uint32_t pass = 0;
|
||||
for (; pass < 6; ++pass)
|
||||
{
|
||||
memset(histogram, 0, sizeof(uint16_t)*BX_RADIXSORT_HISTOGRAM_SIZE);
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
|
||||
bool sorted = true;
|
||||
uint64_t key = keys[0];
|
||||
uint64_t prevKey = key;
|
||||
for (uint32_t ii = 0; ii < _size; ++ii, prevKey = key)
|
||||
{
|
||||
uint64_t key = _keys[ii];
|
||||
key = keys[ii];
|
||||
uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
|
||||
++histogram[index];
|
||||
sorted &= prevKey <= key;
|
||||
}
|
||||
|
||||
if (sorted)
|
||||
{
|
||||
goto done;
|
||||
}
|
||||
|
||||
uint16_t offset = 0;
|
||||
@@ -83,23 +126,34 @@ namespace bx
|
||||
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
{
|
||||
uint64_t key = _keys[ii];
|
||||
uint64_t key = keys[ii];
|
||||
uint16_t index = (key>>shift)&BX_RADIXSORT_BIT_MASK;
|
||||
uint16_t dest = histogram[index]++;
|
||||
_tempKeys[dest] = key;
|
||||
_tempValues[dest] = _values[ii];
|
||||
tempKeys[dest] = key;
|
||||
tempValues[dest] = values[ii];
|
||||
}
|
||||
|
||||
uint64_t* swapKeys = _tempKeys;
|
||||
_tempKeys = _keys;
|
||||
_keys = swapKeys;
|
||||
uint64_t* swapKeys = tempKeys;
|
||||
tempKeys = keys;
|
||||
keys = swapKeys;
|
||||
|
||||
Ty* swapValues = _tempValues;
|
||||
_tempValues = _values;
|
||||
_values = swapValues;
|
||||
Ty* swapValues = tempValues;
|
||||
tempValues = values;
|
||||
values = swapValues;
|
||||
|
||||
shift += BX_RADIXSORT_BITS;
|
||||
}
|
||||
|
||||
done:
|
||||
if (0 != (pass&1) )
|
||||
{
|
||||
// Odd number of passes needs to do copy to the destination.
|
||||
memcpy(_keys, _tempKeys, _size*sizeof(uint64_t) );
|
||||
for (uint32_t ii = 0; ii < _size; ++ii)
|
||||
{
|
||||
_values[ii] = _tempValues[ii];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef BX_RADIXSORT_BITS
|
||||
|
||||
Reference in New Issue
Block a user