#include <cudpp_globals.h>
#include "cudpp_radixsort.h"
#include "cta/scan_cta.cu"
#include <cudpp.h>
#include <stdio.h>
#include <cudpp_util.h>
#include <math.h>
#include "sharedmem.h"
Radix Sort Functions | |
| typedef unsigned int | uint |
| template<bool doFlip> | |
| __device__ uint | floatFlip (uint f) |
| Flips bits of single-precision floating-point number (parameterized by doFlip). | |
| template<bool doFlip> | |
| __device__ uint | floatUnflip (uint f) |
| Reverses bit-flip of single-precision floating-point number (parameterized by doFlip). | |
| template<class T , int maxlevel> | |
| __device__ T | scanwarp (T val, T *sData) |
| Scans one warp quickly, optimized for 32-element warps, using shared memory. | |
| __device__ uint4 | scan4 (uint4 idata) |
| Scans 4*CTA_SIZE unsigned ints in a block. | |
| template<int ctasize> | |
| __device__ uint4 | rank4 (uint4 preds) |
| Computes output position for each thread given predicate; trues come first then falses. | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlock (uint4 &key, uint4 &value) |
| Sorts one block. | |
| template<uint nbits, uint startbit> | |
| __device__ void | radixSortBlockKeysOnly (uint4 &key) |
| Sorts one block. Key-only version. | |
sort_cta.cu
1.5.9