#ifndef TRIPLETS_BUNCH_CTX_H_ #define TRIPLETS_BUNCH_CTX_H_ /** @file bunch-ctx.h per-bunch context definition */ #include using namespace std; #include "data.h" #include "layout-ptr.h" #include "track.h" #include "triplet.h" #include "util.h" #include "vec.h" /** whether or not to bunch & sort the hits on host */ //#define BUNCH_HOST 1 #define BUNCH_HOST 0 // forward definitions class GlobalCtx; /** a kernel that does reconstruction for all bunches, 1 thread/bunch using dynamic parallelism */ __global__ void reconstruct_dyn_k(void); /** a kernel that does reconstruction for all bunches, 1 thread block/bunch */ __global__ void reconstruct_tblock_k(void); /** a function for reconstruction with host streams, 1 host stream / bunch */ void reconstruct_hstream_fun(GlobalCtx *g); /** a function for reconstruction with host streams and threads, 1 host stream / thread, bunches distributed evenly */ void reconstruct_hthread_fun(GlobalCtx *g); /** a function for a kernel which computes circles for tracks */ __device__ void compute_circles_tkfun (layptr cand_track, int ncand_tracks, int itrack); /** per-bunch context; all data here are stored on device */ class BunchCtx { public: /** static initializer for BunchCtx, guaranteed to be called before any other static function or creation of any bunchctx object */ static void initialize(void); /** dummy default constructor for host and device */ __host__ __device__ BunchCtx() {} /** bunch initialization */ BunchCtx(GlobalCtx *global_ctx, BunchCtx *d_this, int bunch_id, float tstart, float dtcore); //@region function to be called from host //@{ /** allocates memory needed for arrays of this bunch context, mostly on device */ void alloc_memory(void); /** bunch hits on host, take hits from the global context */ void bunch_hits_host(void); /** find tube hits on the host (except for pivot/skewed hits and non-zero tubes list) */ void find_tube_hits_host(void); /** sync arrays to device (not the bunch context itself) */ void sync_arrays_to_device(void); /** frees all memory associated with the context */ void free_memory(void); /** collect all tracks back on CPU; bunch contexts must be synchronized before that */ void collect_tracks(vector &tracks); //@} #if WITH_DYN_PAR /** does the entire job of track reconstruction with dynamic parallelism */ __device__ void reconstruct_dyn(void); #endif /** does the entire job of track reconstruction with single thread block */ __device__ void reconstruct_tblock(void); //@region thread functions to call from reconstructions kernels //@{ /** function for kernel to compute tube histogram */ __device__ void tube_hit_histo_tkfun(int ihit); /** function for kernel to group hits by tubes */ __device__ void tube_hit_group_tkfun(int ihit); /** function for kernel to find tube with non-zero hits */ __device__ void nz_tubes_tkfun(int itube); /** function for kernel to find triplets */ __device__ void find_triplets_tkfun(int ihit); /** function for kernel to find half-skewlets */ __device__ void find_half_skewlets_tkfun(int ihit); /** function for kernel to combine half-skewlets without poca test */ __device__ void combine_half_skewlets_tkfun (int irange, int ihalf, int jhalf); /** function for kernel to perform poca test */ __device__ void poca_test_tkfun(int irange, int ipair, bool with_bins); /** function for kernel to combine two layers of triplets */ __device__ void combine_triplets_tkfun (int inner_id, int outer_id, float max_phi, int tinner, int touter); /** function for kernel to evaluate tracks against tubes using shared memory without rasterization */ __device__ void eval_tracks_tubes_shared_tkfun(int itrack); /** function for kernel to evaluate tracks against tubes using simple //sector-layer "rasterization" and possibly shared memory */ __device__ void eval_tracks_tubes_raster_tkfun(int itrack); /** function for kernel to evaluate tracks against skewed layers, where each track is evaluated against all skewlets */ __device__ void eval_tracks_skewlets_all_tkfun(int itrack); /** function for kernel to evaluate tracks against skewed layers, where each track is evaluated only against bins it crosses */ __device__ void eval_tracks_skewlets_bins_tkfun(int itrack); //@} //@region reconstruction functions with dynamic parallelism //@{ /** construct a map of tubes into hits; initial hits are distributed bewteen bunches using */ #if WITH_DYN_PAR __device__ void find_tube_hits_dyn(cudaStream_t s); /** find triplets, also group them by pivot ranges */ __device__ void find_triplets_dyn(cudaStream_t s); /** finds candidate tracks and evalutates circles for them */ __device__ void find_cand_tracks_dyn(cudaStream_t s); /** finds skewlet halfs, grouped by skewed ranges */ __device__ void find_half_skewlets_dyn(cudaStream_t s); /** finds skewlets, grouped by skewed ranges (pairs of skewed ranges) */ __device__ void find_skewlets_dyn(cudaStream_t s); /** evaluate track candidates and leave only those which correspond to valid tracks */ __device__ void eval_cand_tracks_dyn(cudaStream_t s); #endif //@} //@region reconstruction functions with single thread block per bunch //@{ /** construct a map of tubes into hits; initial hits are distributed bewteen bunches using */ __device__ void find_tube_hits_tblock(void); /** find triplets, also group them by pivot ranges */ __device__ void find_triplets_tblock(void); /** finds candidate tracks and evalutates circles for them */ __device__ void find_cand_tracks_tblock(void); /** finds skewlet halfs, grouped by skewed ranges */ __device__ void find_half_skewlets_tblock(void); /** finds skewlets, grouped by skewed ranges (pairs of skewed ranges) */ __device__ void find_skewlets_tblock(void); /** evaluate track candidates and leave only those which correspond to valid tracks */ __device__ void eval_cand_tracks_tblock(void); //@} //@region reconstruction functions called from host, with single stream per //bunch //@{ /** sync the bunch back to host on a specific stream; it is assumed that bunch memory has been allocated by cudaMallocHost */ void sync_back(cudaStream_t s); /** construct a map of tubes into hits; initial hits are distributed bewteen bunches using */ void find_tube_hits_hstream(cudaStream_t s); /** find triplets, also group them by pivot ranges */ void find_triplets_hstream(cudaStream_t s); /** finds candidate tracks and evalutates circles for them */ void find_cand_tracks_hstream(cudaStream_t s); /** finds skewlet halfs, grouped by skewed ranges */ void find_half_skewlets_hstream(cudaStream_t s); /** finds skewlets, do everything except poca test */ void find_skewlets_pre_poca_hstream(cudaStream_t s); /** find skewlets, do poca test */ void find_skewlets_poca_hstream(cudaStream_t s); /** compute circles for the tracks */ void compute_track_circles(cudaStream_t s); /** evaluate track candidates and leave only those which correspond to valid tracks */ void eval_cand_tracks_hstream(cudaStream_t s); //@} /** global contexts for host and device */ GlobalCtx *h_global_ctx, *d_global_ctx; /** the device pointer to this bunch context */ BunchCtx *d_this; /** the id of the bunch */ int bunch_id; /** starting time of the bunch, inclusive */ float tstart; /** ending core time of the bunch, exclusive */ float tend_core; /** ending time of the bunch, exclusive, = core_time + DRIFT_TIME_PEPS */ float tend; /** hits on host and device side, [nhits] */ Hit *hits, *h_hits; /** the number of hits in the bunch */ int nhits; // FINISHED HERE // TODO: define hits with layouts, modify the code as necessary /** hits grouped by tubes (non-empty ranges only for pivot neighbors, which includes pivot tubes) */ //Hit *tube_hits, *h_tube_hits; layptr tube_hits, h_tube_hits; /** starts of hit ranges belonging to tubes (ranges non-empty only for pivot tubes and their neighbors; currently computed for all tubes) [ntubes + 1] */ int *tube_hit_starts, *h_tube_hit_starts; /** counts of the hits belonging to each tube [ntubes + 1] */ int *tube_hit_counts, *h_tube_hit_counts; /** scratch memory for doing prefix sums on GPU */ int *tube_hit_scratch; /** indices of tubes with non-zero hit counts */ int *itubes, *h_itubes; /** number of indices of tubes with non-zero counts */ int nitubes; /** hits belonging to pivot tubes, [MAX_NTRIPLETS, only npivot_hits occupied] */ layptr pivot_hits; //Hit *pivot_hits; /** total number of pivot hits */ int npivot_hits; /** hits belonging to skewed tubes, [MAX_NSKEWLETS, only nskewed_hits occupied] */ layptr skewed_hits; //Hit *skewed_hits; /** total number of skewed hits */ int nskewed_hits; /** triplets by range [npivot_ranges * MAX_NTRIPLETS] */ //Triplet *triplets; layptr triplets; /** triplet counts in ranges, for host and device, [npivot_ranges] */ int *h_ntriplets, *ntriplets; /** full array of half-skewlets, [nskewed_ranges * MAX_NSKEWLETS] */ //HalfSkewlet *half_skewlets; layptr half_skewlets; /** half-skewlet counts [nskewed_ranges] (correspond to skewed tube ranges) */ int *h_nhalf_skewlets, *nhalf_skewlets; /** pre-poca half-skewlet pairs, by range, [nskewlet_ranges * MAX_NSKEWLETS * 2] */ int *pre_poca_pairs; /** number of pre-poca pairs, by range, [nskewlet_ranges]*/ int *h_npre_poca_pairs, *npre_poca_pairs; /** full array of skewlets, [nskewlet_ranges * MAX_NSKEWLETS] */ layptr skewlets; /** array of individual skewlet components [nskewlet_ranges * MAX_NSKEWLETS] */ float *poca_xs, *poca_ys, *min_t0s; /** number of skewlets found per range, [nskewlet_ranges] */ int *h_nskewlets, *nskewlets; /** skewlet bins [nskewlet_ranges * nsectors * NSL_SKEWLET_BINS] */ int *skewlet_bins; /** candidate tracks [MAX_NTRACKS] */ layptr cand_tracks; /** number of candidte tracks (updated on device with atomics) */ int ncand_tracks; /** tracks found during reconstruction [MAX_NTRACKS] */ Track *found_tracks, *h_found_tracks; /** found track count (updated on device with atomics) */ int nfound_tracks; // global context can acccess data of bunch context friend class GlobalCtx; }; // class BunchCtx #endif