segmented_scan_cta.cu File Reference

CUDPP CTA-level scan routines. More...

#include <cudpp_globals.h>
#include <math.h>
#include <cstdio>

Classes

class  SegmentedScanTraits< T, oper, backward, exclusivity, unroll, fullBlock, sums >
 Template class containing compile-time parameters to the segmented scan functions. More...

Segmented scan Functions

#define _SUM_SEGMENTED_STEP_NOSYNC(d)
#define _SUM_SEGMENTED_STEP_SYNC(d)
#define _SUM_SEGMENTED_STEP   _SUM_SEGMENTED_STEP_NOSYNC
#define SUM_SEGMENTED_STEP_SYNC(d)
#define SUM_SEGMENTED_STEP(d)
#define _SCAN_SEGMENTED_STEP_NOSYNC(d)
#define _SCAN_SEGMENTED_STEP_SYNC(d)
#define _SCAN_SEGMENTED_STEP   _SCAN_SEGMENTED_STEP_NOSYNC
#define SCAN_SEGMENTED_STEP_SYNC(d)
#define SCAN_SEGMENTED_STEP(d)
template<class T, typename traits>
__device__ void loadForSegmentedScanSharedChunkFromMem4 (T *s_odata, T threadScan0[4], T threadScan1[4], unsigned int &threadFlag, unsigned int *s_oflags, unsigned int *s_otflags, unsigned int *s_oindices, const T *d_idata, const unsigned int *d_iflags, int numElements, int iDataOffset, int &ai, int &bi, int &aiDev, int &biDev, int &bankOffsetA, int &bankOffsetB)
 Handles loading input s_data from global memory to shared memory (vec4 version).
template<class T, unsigned int ctasize, class traits>
__device__ unsigned int buildSegmentedSumTree (T *s_data, unsigned int *s_indices, unsigned int *s_tflags)
 Down-sweep (aka reduce) phase of CTA-level segmented scan in shared memory.
template<class T, class traits>
__device__ void clearLastElementAndStoreSumFlagIndex (T *s_data, unsigned int *s_tflags, unsigned int *s_indices, T *d_blockSums, unsigned int *d_blockFlags, unsigned int *d_blockIndices)
 Intermediate phase between down- and up-sweep phases of CTA-level segmented scan.
template<class T, unsigned int ctasize, class traits>
__device__ void segmentedScanRootToLeaves (T *s_data, unsigned int *s_flags, unsigned int *s_tflags, unsigned int stride)
 Up-sweep phase of CTA-level segmented scan in shared memory.
template<class T, class traits>
__device__ void storeForSegmentedScanSharedChunkToMem4 (T *d_odata, T threadScan0[4], T threadScan1[4], unsigned int threadFlag, T *s_idata, unsigned int numElements, int oDataOffset, int ai, int bi, int aiDev, int biDev, int bankOffsetA, int bankOffsetB)
 Handles storing result s_data from shared memory to global memory (vec4 version).
template<class T, class traits>
__device__ void segmentedScanCTA (T *s_data, unsigned int *s_flags, unsigned int *s_tflags, unsigned int *s_indices, T *d_blockSums=0, unsigned int *d_blockFlags=0, unsigned int *d_blockIndices=0)
 CTA-level segmented scan routine;.


Detailed Description

CUDPP CTA-level scan routines.

segmented_scan_cta.cu


Generated on Sun Apr 20 19:51:33 2008 for CUDPP by  doxygen 1.5.5