#include <cudpp_globals.h>
#include <math.h>
#include <cstdio>
Classes | |
| class | SegmentedScanTraits< T, oper, backward, exclusivity, unroll, fullBlock, sums > |
| Template class containing compile-time parameters to the segmented scan functions. More... | |
Segmented scan Functions | |
| #define | _SUM_SEGMENTED_STEP_NOSYNC(d) |
| #define | _SUM_SEGMENTED_STEP_SYNC(d) |
| #define | _SUM_SEGMENTED_STEP _SUM_SEGMENTED_STEP_NOSYNC |
| #define | SUM_SEGMENTED_STEP_SYNC(d) |
| #define | SUM_SEGMENTED_STEP(d) |
| #define | _SCAN_SEGMENTED_STEP_NOSYNC(d) |
| #define | _SCAN_SEGMENTED_STEP_SYNC(d) |
| #define | _SCAN_SEGMENTED_STEP _SCAN_SEGMENTED_STEP_NOSYNC |
| #define | SCAN_SEGMENTED_STEP_SYNC(d) |
| #define | SCAN_SEGMENTED_STEP(d) |
| template<class T, typename traits> | |
| __device__ void | loadForSegmentedScanSharedChunkFromMem4 (T *s_odata, T threadScan0[4], T threadScan1[4], unsigned int &threadFlag, unsigned int *s_oflags, unsigned int *s_otflags, unsigned int *s_oindices, const T *d_idata, const unsigned int *d_iflags, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev, int &bankOffsetA, int &bankOffsetB) |
| Handles loading input s_data from global memory to shared memory (vec4 version). | |
| template<class T, unsigned int ctasize, class traits> | |
| __device__ unsigned int | buildSegmentedSumTree (T *s_data, unsigned int *s_indices, unsigned int *s_tflags) |
| Down-sweep (aka reduce) phase of CTA-level segmented scan in shared memory. | |
| template<class T, class traits> | |
| __device__ void | clearLastElementAndStoreSumFlagIndex (T *s_data, unsigned int *s_tflags, unsigned int *s_indices, T *d_blockSums, unsigned int *d_blockFlags, unsigned int *d_blockIndices) |
| Intermediate phase between down- and up-sweep phases of CTA-level segmented scan. | |
| template<class T, unsigned int ctasize, class traits> | |
| __device__ void | segmentedScanRootToLeaves (T *s_data, unsigned int *s_flags, unsigned int *s_tflags, unsigned int stride) |
| Up-sweep phase of CTA-level segmented scan in shared memory. | |
| template<class T, class traits> | |
| __device__ void | storeForSegmentedScanSharedChunkToMem4 (T *d_odata, T threadScan0[4], T threadScan1[4], unsigned int threadFlag, T *s_idata, unsigned int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev, int bankOffsetA, int bankOffsetB) |
| Handles storing result s_data from shared memory to global memory (vec4 version). | |
| template<class T, class traits> | |
| __device__ void | segmentedScanCTA (T *s_data, unsigned int *s_flags, unsigned int *s_tflags, unsigned int *s_indices, T *d_blockSums=0, unsigned int *d_blockFlags=0, unsigned int *d_blockIndices=0) |
| CTA-level segmented scan routine;. | |
1.5.5