Defines | |
| #define | AVOID_BANK_CONFLICTS |
Variables | |
| const int | NUM_BANKS = 16 |
| const int | LOG_NUM_BANKS = 4 |
| const int | CTA_SIZE = 128 |
| const int | WARP_SIZE = 32 |
| const int | LOG_CTA_SIZE = 7 |
| const int | LOG_WARP_SIZE = 5 |
| const int | LOG_SIZEOF_FLOAT = 2 |
| const int | SCAN_ELTS_PER_THREAD = 8 |
| const int | SEGSCAN_ELTS_PER_THREAD = 8 |
| const int | maxSharedMemoryPerBlock = 16384 |
| const int | maxThreadsPerBlock = CTA_SIZE |
| #define AVOID_BANK_CONFLICTS |
Set if by default, we want our shared memory allocation to perform additional computation to avoid bank conflicts
| const int NUM_BANKS = 16 |
Number of shared memory banks
| const int LOG_NUM_BANKS = 4 |
log_2(NUM_BANKS)
| const int CTA_SIZE = 128 |
Number of threads in a CTA
| const int WARP_SIZE = 32 |
Number of threads in a warp
| const int LOG_CTA_SIZE = 7 |
log_2(CTA_SIZE)
| const int LOG_WARP_SIZE = 5 |
log_2(WARP_SIZE)
| const int LOG_SIZEOF_FLOAT = 2 |
log_2(sizeof(float))
| const int SCAN_ELTS_PER_THREAD = 8 |
Number of elements per scan thread
| const int SEGSCAN_ELTS_PER_THREAD = 8 |
Number of elements per segmented scan thread
| const int maxSharedMemoryPerBlock = 16384 |
Number of bytes of shared memory in each block
| const int maxThreadsPerBlock = CTA_SIZE |
Maximum number of threads in a CTA
1.5.5