/** @file riemann track finder implementation */ #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; #include "riemann-track-finder.h" #include "util.h" #include "eigen_solver.h" #include "track.h" #define THREADS_PER_BLOCK 512 #define MAX_TRACK_LENGTH 20 /* maximum track growth size */ #define MAX_NUM_TRACKS 2048 /* maximum number of track candidates */ #define TRACK_SAVE_SPACE 32768 /* maximum space available for track candidate saving */ #define MIN_TRACK_LENGTH 4 /* minimum length of a track candidate */ //some original parameters //#define MAX_SZ_CHI_SQUARE .05 //#define MAX_DIST_FROM_PLANE .4 //#define MAX_DIST_FROM_SZ_LINE .3 //#define MIN_TRACK_LENGTH 4 //#define MAX_SZ_CHI_SQUARE .05 //#define MAX_DIST_FROM_PLANE .05 //#define MAX_DIST_FROM_SZ_LINE .05 //#define MIN_TRACK_LENGTH 5 RiemannTrackFinder::RiemannTrackFinder(const RiemannTrackFinderOptions &opts) { // zero out memset(this, 0, sizeof(*this)); this->opts = opts; } // RiemannTrackFinder RiemannTrackFinder::~RiemannTrackFinder() { // TODO: free all that memory //cucheck(cudaFree(d_hits)); } // ~RiemannTrackFinder /** * Runs the RiemannTrackFinder. * * @return the track candidates */ vector RiemannTrackFinder::find_tracks(){ if(read_hits() != 0){ cout << "ERROR READING HITS" << endl; return tracks; } if(opts.verbosity > 1){ for(int i = 0; i < hits.size(); i++){ cout << hits[i].x << endl; } } //execute all steps sort_hits_by_layer(); cudaDeviceReset(); initialize_layer_offset_and_num_hits_in_layer_and_layer_of_hit(); initialize_hits(); project_onto_paraboloid(); generate_seeds(); grow_seeds(); transfer_tracks_to_host(); write_tracks(); compare_tracks_to_true_tracks(); return tracks; } /* utility for sort_hits_by_layer */ bool hit_smaller_layer(Hit a, Hit b){ return a.layer < b.layer; } void RiemannTrackFinder::sort_hits_by_layer(){ sort(hits.begin(), hits.end(), hit_smaller_layer); for(int i = 0; i < hits.size(); i++){ id_of_sorted_hit.push_back(hits[i].id); } if(opts.verbosity > 1){ cout << "id_of_sorted_hit: "; for(int i = 0; i < id_of_sorted_hit.size(); i++){ cout << id_of_sorted_hit[i] << ", "; } cout << endl; } if(opts.verbosity > 1){ for(int i = 0; i < hits.size(); i++){ cout << hits[i].x << endl; } } } /** * Reads hits from input file, filling the vector hits. * @return 0 if successful */ int RiemannTrackFinder::read_hits(){ hits.clear(); const char* path = opts.hits_path; FILE *f = fopen(path, "r"); if(!f) { fprintf(stderr, "cannot read hits file %s\n", path); exit(-1); return 1; } int id, track_id, layer; float x, y, z; int nread_pos; const int GOOD_NREAD_POS = 2; int line = 1; while((nread_pos = fscanf(f, "%d %d %d %f %f %f %*[\n]", &id, &track_id, &layer, &x, &y, &z)) > 0) { if(nread_pos < GOOD_NREAD_POS) { fprintf(stderr, "cannot read hit line %d from file %s\n", line, path); exit(-1); return 1; } hits.push_back(Hit(id, track_id, layer, x, y, z)); line++; }// while() fclose(f); return 0; } /** * Initializes several useful maps in device memory. */ void RiemannTrackFinder::initialize_layer_offset_and_num_hits_in_layer_and_layer_of_hit(){ vector layer_starts; vector num_hits_in_each_layer; vector layer_of_hit; int layer_count = 0; int last_layer = -1; for(int i = 0; i < hits.size(); i++){ int current_layer = hits[i].layer; if(current_layer > last_layer){ last_layer = current_layer; layer_starts.push_back(i); layer_count++; } layer_of_hit.push_back(current_layer); } for(int i = 0; i < layer_starts.size() - 1; i++){ num_hits_in_each_layer.push_back(layer_starts[i+1] - layer_starts[i]); } num_hits_in_each_layer.push_back(hits.size() - layer_starts[layer_starts.size() - 1]); // update fields num_layers = layer_count; if(opts.verbosity > 99){ cout << "num_layers = " << num_layers << endl; } cudaMalloc(&d_layer_offset, layer_count * sizeof(int)); cudaMemcpy(d_layer_offset, &layer_starts[0], layer_count * sizeof(int), cudaMemcpyHostToDevice); cudaMalloc(&d_num_hits_in_layer, layer_count * sizeof(int)); cudaMemcpy(d_num_hits_in_layer, &num_hits_in_each_layer[0], layer_count * sizeof(int), cudaMemcpyHostToDevice); cudaMalloc(&d_layer_of_hit, hits.size() * sizeof(int)); cudaMemcpy(d_layer_of_hit, &layer_of_hit[0], hits.size() * sizeof(int), cudaMemcpyHostToDevice); if(opts.verbosity > 4){ cout << "layer offsets" << endl; for(int i = 0; i < layer_starts.size(); i++){ cout << layer_starts[i] << endl; } cout << "number of hits in each layer" << endl; for(int i = 0; i < num_hits_in_each_layer.size(); i++){ cout << num_hits_in_each_layer[i] << endl; } cout << "layer of hit" << endl; for(int i = 0; i < hits.size(); i++){ cout << layer_of_hit[i] << endl; } } } /** * Initializes hit information on the device. */ void RiemannTrackFinder::initialize_hits(){ vector hit_information; for(int i = 0; i < hits.size(); i++){ hit_information.push_back(hits[i].x); hit_information.push_back(hits[i].y); hit_information.push_back(0.0); //placeholder for w hit_information.push_back(hits[i].z); } // update_fields cudaMalloc(&d_hits, hits.size() * 4 * sizeof(float)); cudaMemcpy(d_hits, &hit_information[0], hits.size() * 4 * sizeof(float), cudaMemcpyHostToDevice); } /* Each thread computes the w coordinate of a hit point. */ __global__ void project_onto_paraboloid_k(float* d_hits, int num_hits){ int idx = threadIdx.x + blockIdx.x * blockDim.x; if(idx < num_hits){ d_hits[idx * 4 + 2] = d_hits[idx * 4] * d_hits[idx * 4] + d_hits[idx * 4 + 1] * d_hits[idx * 4 + 1]; } } /** * Computes the w coordinate for hit points on the device. */ void RiemannTrackFinder::project_onto_paraboloid(){ int num_hits = hits.size(); project_onto_paraboloid_k<<>>(d_hits, num_hits); if(opts.verbosity > 7){ float hit_information[num_hits * 4]; cudaMemcpy(&hit_information[0], d_hits, num_hits * 4 * sizeof(float), cudaMemcpyDeviceToHost); cout << "hit points after projection (x, y, w, z)" << endl; for(int i = 0; i < num_hits * 4; i += 4){ cout << hit_information[i] << ", " << hit_information[i+1] << ", " << hit_information[i+2] << ", " << hit_information[i+3] << endl; } } } /* Each thread writes a unique layer triplet (along with its number of hit triplets). */ /* Fills layer_triplets and num_hit_triplets_in_layer_triplet. */ __global__ void generate_layer_triplets(int* d_layer_triplets, int* d_layer_triplet_write_location, int* d_num_hit_triplets_in_layer_triplet, int* d_num_hits_in_layer, int num_layers, int num_layer_triplets){ int idx = blockIdx.x * blockDim.x + threadIdx.x; if(idx < num_layer_triplets) { int a, b, c; /* the 3 layers of the layer triplet */ int num_hit_triplets; /* number of hit triplets in the layer triplet */ /* idx-based determination of a, b, c, and num_hit_triplets */ float fudge = .0001; // TODO perhaps a more elegant solution exists float g = cbrtf(sqrtf(3.) * sqrtf(243 * powf(idx, 2.) - 1.) + 27. * idx); float n = g / cbrtf(9.) + 1 / (cbrtf(3.) * g) - 1.; int triangle_number = (int)floorf(n + fudge); float triangle_start = powf(triangle_number, 3.) / 6. + powf(triangle_number, 2.) / 2. + triangle_number / 3. - fudge; int triangle_pos = idx - triangle_start; int column_number = (int)(.5 * (sqrtf(8. * triangle_pos + 1.) - 1.) + fudge); float column_start = powf(column_number, 2.) / 2. + column_number / 2. - fudge; int column_pos = triangle_pos - column_start; a = num_layers - 3 - triangle_number; b = num_layers - 2 - column_number; c = num_layers - 1 - column_pos; num_hit_triplets = d_num_hits_in_layer[a] * d_num_hits_in_layer[b] * d_num_hits_in_layer[c]; /* atomic allocate/write of layer triplet and number of hit triplets in layer triplet */ int current_layer_triplet_write_location = atomicAdd(d_layer_triplet_write_location, 3); d_layer_triplets[current_layer_triplet_write_location + 0] = a; d_layer_triplets[current_layer_triplet_write_location + 1] = b; d_layer_triplets[current_layer_triplet_write_location + 2] = c; int num_hit_triplets_write_location = current_layer_triplet_write_location / 3; d_num_hit_triplets_in_layer_triplet[num_hit_triplets_write_location] = num_hit_triplets; //printf("num = %d, pos = %d\n", num_hit_triplets, num_hit_triplets_write_location); } } // generate_layer_triplets /* Each thread writes a unique hit triplet. */ /* Fills hit_triplets. */ __global__ void generate_hit_triplets(int* d_layer_triplets, int* d_num_hit_triplets_in_layer_triplet, int* d_hit_triplets, int* d_hit_triplet_write_location, int* d_num_hits_in_layer, int* d_layer_offset, int num_layer_triplets){ int idx = blockIdx.x * blockDim.x + threadIdx.x; if(idx < d_num_hit_triplets_in_layer_triplet[num_layer_triplets]) { /* perform binary search for idx */ int max = num_layer_triplets; int min = 0; int range = max - min; int mid = range / 2 + min; while(!(d_num_hit_triplets_in_layer_triplet[mid] <= idx && d_num_hit_triplets_in_layer_triplet[mid + 1] > idx)){ if(d_num_hit_triplets_in_layer_triplet[mid] > idx){ max = mid - 1; } else{ min = mid + 1; } range = max - min; mid = range / 2 + min; } /* determination of hit triplet */ int layer_triplet_start_index = mid * 3; int a = d_layer_triplets[layer_triplet_start_index + 0]; int b = d_layer_triplets[layer_triplet_start_index + 1]; int c = d_layer_triplets[layer_triplet_start_index + 2]; int num_hits_in_a = d_num_hits_in_layer[a]; int num_hits_in_b = d_num_hits_in_layer[b]; int num_hits_in_c = d_num_hits_in_layer[c]; int pos = idx - d_num_hit_triplets_in_layer_triplet[mid]; int version = pos; int hit_in_a = pos % num_hits_in_a + d_layer_offset[a]; pos /= num_hits_in_a; int hit_in_b = pos % num_hits_in_b + d_layer_offset[b]; pos /= num_hits_in_b; int hit_in_c = pos % num_hits_in_c + d_layer_offset[c]; /* atomic allocate/write of hit triplet */ int current_hit_triplet_write_location = atomicAdd(d_hit_triplet_write_location, 3); d_hit_triplets[current_hit_triplet_write_location + 0] = hit_in_a; d_hit_triplets[current_hit_triplet_write_location + 1] = hit_in_b; d_hit_triplets[current_hit_triplet_write_location + 2] = hit_in_c; //if(mid > 0){ // printf("hit triplet %d: (%d, %d, %d) from version %d of %d from layer triplet (%d, %d, %d) with num hits in each layer %d, %d, %d\n", idx, hit_in_a, hit_in_b, hit_in_c, version, d_num_hit_triplets_in_layer_triplet[mid] - d_num_hit_triplets_in_layer_triplet[mid - 1], a, b, c, num_hits_in_a, num_hits_in_b, num_hits_in_c); //} } } // generate_hit_triplets /** * Generates all unique hit triplets filling d_hit_triplets. Performs some verification of generation correctness on host provided sufficient verbosity. */ void RiemannTrackFinder::generate_seeds(){ int num_layer_triplets = num_layers * (num_layers - 1) * (num_layers - 2) / 6; //prepare device fields cudaMalloc(&d_layer_triplet_write_location, sizeof(int)); cudaMemset(d_layer_triplet_write_location, 0, sizeof(int)); cudaMalloc(&d_hit_triplet_write_location, sizeof(int)); cudaMemset(d_hit_triplet_write_location, 0, sizeof(int)); cudaMalloc(&d_layer_triplets, (num_layer_triplets + 1) * 3 * sizeof(int)); cudaMalloc(&d_num_hit_triplets_in_layer_triplet, (num_layer_triplets + 1) * sizeof(int)); generate_layer_triplets<<>>(d_layer_triplets, d_layer_triplet_write_location, d_num_hit_triplets_in_layer_triplet, d_num_hits_in_layer, num_layers, num_layer_triplets); /* perform scan on number of hit triplets for each layer triplet */ thrust::device_ptr dev_ptr(d_num_hit_triplets_in_layer_triplet); thrust::exclusive_scan(dev_ptr, dev_ptr + (num_layer_triplets + 1), dev_ptr ); //updates the field num_hit_triplets num_hit_triplets = dev_ptr[num_layer_triplets]; /* total number of hit triplets is last element */ /* allocate device memory for hit_triplets */ cudaMalloc(&d_hit_triplets, 3 * num_hit_triplets * sizeof(int)); generate_hit_triplets<<>>(d_layer_triplets, d_num_hit_triplets_in_layer_triplet, d_hit_triplets, d_hit_triplet_write_location, d_num_hits_in_layer, d_layer_offset, num_layer_triplets); if(opts.verbosity > 99){ int h_layer_offset[num_layers]; int h_num_hits_in_layer[num_layers]; cudaMemcpy(&h_layer_offset[0], d_layer_offset, num_layers * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(&h_num_hits_in_layer[0], d_num_hits_in_layer, num_layers * sizeof(int), cudaMemcpyDeviceToHost); /* layer triplet generation correctness check */ int h_layer_triplets[3 * num_layer_triplets]; int correct_layer_triplets[3 * num_layer_triplets]; cudaMemcpy(&h_layer_triplets[0], d_layer_triplets, num_layer_triplets * 3 * sizeof(int), cudaMemcpyDeviceToHost); for(int i = 0; i < 3 * num_layer_triplets; i += 3){ int a = h_layer_triplets[i + 0]; int b = h_layer_triplets[i + 1]; int c = h_layer_triplets[i + 2]; cout << a << ", " << b << ", " << c << endl; } int correct_layer_triplets_write_location = 0; for(int i = 0; i < num_layers - 2; i++) for(int j = i + 1; j < num_layers - 1; j++) for(int k = j + 1; k < num_layers; k++) { correct_layer_triplets[correct_layer_triplets_write_location + 0] = i; correct_layer_triplets[correct_layer_triplets_write_location + 1] = j; correct_layer_triplets[correct_layer_triplets_write_location + 2] = k; correct_layer_triplets_write_location += 3; } bool layer_triplets_correct = true; for(int i = 0; i < 3 * num_layer_triplets; i += 3) { int matches_found = 0; for(int j = 0; j < 3 * num_layer_triplets; j += 3) { bool match = true; for(int k = 0; k < 3; k++){ if(h_layer_triplets[k + j] != correct_layer_triplets[k + i]) match = false; } if(match) matches_found++; } if(matches_found != 1) { cout << "FAILURE: " << correct_layer_triplets[i + 0] << ", " << correct_layer_triplets[i + 1] << ", " << correct_layer_triplets[i + 2] << "| " << matches_found << endl; layer_triplets_correct = false; } } if(layer_triplets_correct) cout << "SUCCESS: correct layer triplets generated" << endl; /* hit triplet generation correctness check */ int h_num_hit_triplets_in_layer_triplet[num_layer_triplets + 1]; cudaMemcpy(&h_num_hit_triplets_in_layer_triplet[0], d_num_hit_triplets_in_layer_triplet, (num_layer_triplets + 1) * sizeof(int), cudaMemcpyDeviceToHost); int h_hit_triplets[3 * num_hit_triplets]; cudaMemcpy(&h_hit_triplets[0], d_hit_triplets, num_hit_triplets * 3 * sizeof(int), cudaMemcpyDeviceToHost); int correct_hit_triplets[3 * num_hit_triplets]; for(int i = 0; i < 3 * num_hit_triplets; i += 3){ int hit_1 = h_hit_triplets[i + 0]; int hit_2 = h_hit_triplets[i + 1]; int hit_3 = h_hit_triplets[i + 2]; cout << hit_1 << ", " << hit_2 << ", " << hit_3 << endl; } int correct_hit_triplets_write_location = 0; for(int i = 0; i < num_layers; i++){ for(int j = i + 1; j < num_layers; j++){ for(int k = j + 1; k < num_layers; k++){ for(int hit_1 = h_layer_offset[i]; hit_1 < h_layer_offset[i] + h_num_hits_in_layer[i]; hit_1++){ for(int hit_2 = h_layer_offset[j]; hit_2 < h_layer_offset[j] + h_num_hits_in_layer[j]; hit_2++){ for(int hit_3 = h_layer_offset[k]; hit_3 < h_layer_offset[k] + h_num_hits_in_layer[k]; hit_3++){ correct_hit_triplets[correct_hit_triplets_write_location + 0] = hit_1; correct_hit_triplets[correct_hit_triplets_write_location + 1] = hit_2; correct_hit_triplets[correct_hit_triplets_write_location + 2] = hit_3; correct_hit_triplets_write_location += 3; } } } } } } bool hit_triplets_correct = true; for(int i = 0; i < 3 * num_hit_triplets; i += 3) { int matches_found = 0; for(int j = 0; j < 3 * num_hit_triplets; j += 3) { bool match = true; for(int k = 0; k < 3; k++){ if(h_hit_triplets[k + j] != correct_hit_triplets[k + i]) match = false; } if(match) matches_found++; } if(matches_found != 1) { cout << "FAILURE: " << correct_hit_triplets[i + 0] << ", " << correct_hit_triplets[i + 1] << ", " << correct_hit_triplets[i + 2] << "| " << matches_found << endl; hit_triplets_correct = false; } } if(hit_triplets_correct) cout << "SUCCESS: correct hit triplets generated" << endl; } } /** Generates r_cg and n for a track candidate. @param d_hits the hit point data (x_1, y_1, w_1, z_1, x_2, ...) on the device @param hit_list the hits in this track candidate @param hit_list_length the number of hits in this track candidate @param r_cg the "center of gravity" of (x, y, w) of hits in this track candidate (a 3 vector) @param n the normalized normal vector of the plane from the plane fit (a 3 vector) */ __device__ void plane_fit(float* d_hits, int* hit_list, int hit_list_length, float* r_cg, float* n){ float A[3][3] = {{0., 0., 0.},{0., 0., 0.},{0., 0., 0.}}; for(int i = 0; i < 3; i++){ r_cg[i] = 0.; } for(int i = 0; i < hit_list_length; i++){ for(int j = 0; j < 3; j++){ r_cg[j] += d_hits[4 * hit_list[i] + j]; } } for(int i = 0; i < 3; i++){ r_cg[i] /= hit_list_length; } for(int i = 0; i < hit_list_length; i++){ float r_i_minus_r_cg[3]; for(int j = 0; j < 3; j++){ r_i_minus_r_cg[j] = d_hits[4 * hit_list[i] + j] - r_cg[j]; } A[0][0] += r_i_minus_r_cg[0] * r_i_minus_r_cg[0]; A[1][1] += r_i_minus_r_cg[1] * r_i_minus_r_cg[1]; A[2][2] += r_i_minus_r_cg[2] * r_i_minus_r_cg[2]; A[0][1] += r_i_minus_r_cg[0] * r_i_minus_r_cg[1]; A[0][2] += r_i_minus_r_cg[0] * r_i_minus_r_cg[2]; A[1][2] += r_i_minus_r_cg[1] * r_i_minus_r_cg[2]; //filling the rest of A is unnecessary } min_eigenvalue_eigenvector(A, n); } /** Performs sz fit for a track candidate. Updates alpha and beta accordingly. @param d_hits the hit point data (x_1, y_1, w_1, z_1, x_2, ...) on the device @param hit_list the hits in this track candidate @param hit_list_length the number of hits in this track candidate @param s the arc lengths on the fitted circle of the hits in this track candidate relative to the first hit of hit_list (only in x-y plane) @param alpha the intercept of fitted sz line @param beta the slope of fitted sz line */ __device__ void sz_fit(float* d_hits, int* hit_list, int hit_list_length, float* s, float &alpha, float &beta) { float s_ave = 0, z_ave = 0; for(int i = 0; i < hit_list_length; i++){ s_ave += s[i]; z_ave += d_hits[4 * hit_list[i] + 3]; } s_ave /= hit_list_length; z_ave /= hit_list_length; float numerator = 0, denominator = 0; for(int i = 0; i < hit_list_length; i++){ numerator += (d_hits[4 * hit_list[i] + 3] - z_ave) * (s[i] - s_ave); denominator += SQR((d_hits[4 * hit_list[i] + 3] - z_ave)); } beta = numerator / denominator; alpha = s_ave - beta * z_ave; } /** Performs helix fit for a track candidate. Updates s, r_cg, n, alpha, and beta. @param d_hits the hit point data (x_1, y_1, w_1, z_1, x_2, ...) on the device @param hit_list the hits in this track candidate @param s the arc lengths on the fitted circle of the hits in this track candidate relative to the first hit of hit_list (only in x-y plane) @param hit_list_length the number of hits in this track candidate @param r_cg the "center of gravity" of (x, y, w) of hits in this track candidate (a 3 vector) @param n the normalized normal vector of the plane from the plane fit (a 3 vector) @param alpha the intercept of fitted sz line @param beta the slope of fitted sz line */ __device__ void fit(float* d_hits, int* hit_list, float* s, int hit_list_length, float* r_cg, float* n, float &alpha, float &beta){ /* perform plane fit to get r_cg and n */ plane_fit(d_hits, hit_list, hit_list_length, r_cg, n); //printf("hits %d, %d, %d : (%f, %f, %f)\n", hit_list[0], hit_list[1], hit_list[2], n[0], n[1], n[2]); //printf("hits %d, %d, %d : (%f, %f, %f)\n", hit_list[0], hit_list[1], hit_list[2], r_cg[0], r_cg[1], r_cg[2]); // TODO: encapsulate parameter derivation float c = -1. * (n[0] * r_cg[0] + n[1] * r_cg[1] + n[2] * r_cg[2]); float x0 = -1. * n[0] / (2. * n[2]); float y0 = -1. * n[1] / (2. * n[2]); float r = sqrt((1. - n[2] * n[2] - 4. * c * n[2]) / (4. * n[2] * n[2])); float reference_x = d_hits[4 * hit_list[0] + 0] - x0; float reference_y = d_hits[4 * hit_list[0] + 1] - y0; float reference_z = d_hits[4 * hit_list[0] + 3]; //printf("x, y : (%f, %f)\n", d_hits[4*hit_list[0] + 0], d_hits[4*hit_list[0] + 1]); float reference_mag = sqrt(reference_x * reference_x + reference_y * reference_y); reference_x /= reference_mag; reference_y /= reference_mag; //printf("ref : (%f, %f)\n", reference_x, reference_y); s[0] = 0.; for(int i = 1; i < hit_list_length; i++){ float other_x = d_hits[4 * hit_list[i] + 0] - x0; float other_y = d_hits[4 * hit_list[i] + 1] - y0; float other_z = d_hits[4 * hit_list[i] + 3]; float other_mag = sqrt(other_x * other_x + other_y * other_y); other_x /= other_mag; other_y /= other_mag; float delta_phi = acos(reference_x * other_x + reference_y * other_y); float z_difference = other_z - reference_z; if(z_difference < 0){ delta_phi *= -1.; } s[i] = delta_phi * r; } //printf("hits %d, %d, %d : (%f, %f, %f)\n", hit_list[0], hit_list[1], hit_list[2], x0, y0, r); //printf("hits %d, %d, %d : (%f, %f, %f)\n", hit_list[0], hit_list[1], hit_list[2], s[0], s[1], s[2]); sz_fit(d_hits, hit_list, hit_list_length, s, alpha, beta); } /* Each thread computes the sz chi square for a unique hit triplet. Hit triplets that pass the sz chi square cut are written in d_cut_hit_triplets. */ __global__ void cut_hit_triplets_k(int* d_cut_hit_triplets, int* d_cut_hit_triplet_write_location, float* d_hits, int* d_hit_triplets, int* d_layer_of_hit, int num_hit_triplets, float max_sz_chi_square){ int idx = threadIdx.x + blockIdx.x * blockDim.x; if(idx < num_hit_triplets){ int hit_list[MAX_TRACK_LENGTH]; /* list of hits */ int hit_list_length = 3; /* number of hits */ float s[MAX_TRACK_LENGTH]; /* arc lengths of hits */ float r_cg[3]; /* "center of gravity" vector of (x, y, w) of hits in this track candidate */ float n[3]; /* normalized normal vector of the plane from the plane fit */ float alpha, beta; /* parameters of sz line fit */ /* initialize hit list from a hit triplet */ for(int i = 0; i < hit_list_length; i++){ hit_list[i] = d_hit_triplets[idx * 3 + i]; } /* do plane fit and sz fit */ fit(d_hits, hit_list, s, hit_list_length, r_cg, n, alpha, beta); //printf("hits %d, %d, %d : alpha: %f beta: %f\n", hit_list[0], hit_list[1], hit_list[2], alpha, beta); float chi_square = 0.; for(int i = 0; i < hit_list_length; i++){ float expected_s_i = alpha + beta * d_hits[4 * hit_list[i] + 3]; float chi_term = (expected_s_i - s[i]) * (expected_s_i - s[i]); chi_square += chi_term; } //printf("hits %d, %d, %d : chi_square: %f\n", hit_list[0], hit_list[1], hit_list[2], chi_square); /* cut on sz fit chi-square, record seeds that succeed (TODO: and their parameters) */ if(chi_square < max_sz_chi_square){ /* atomic allocate/write of valid seeds (cut hit triplets) */ int current_cut_hit_triplet_write_location = atomicAdd(d_cut_hit_triplet_write_location, 3); d_cut_hit_triplets[current_cut_hit_triplet_write_location + 0] = hit_list[0]; d_cut_hit_triplets[current_cut_hit_triplet_write_location + 1] = hit_list[1]; d_cut_hit_triplets[current_cut_hit_triplet_write_location + 2] = hit_list[2]; } /* float c; float x0, y0, r; // track parameters c = -1. * (n[0] * r_cg[0] + n[1] * r_cg[1] + n[2] * r_cg[2]); x0 = -1. * n[0] / (2. * n[2]); y0 = -1. * n[1] / (2. * n[2]); r = sqrt((1. - n[2] * n[2] - 4. * c * n[2]) / (4. * n[2] * n[2])); */ } } /* Each thread attempts to extend a seed (a cut hit triplet) and saves the resulting track's information in d_tracks and d_track_parameters (provided it meets the minimum length requirement). */ __global__ void extend_cut_hit_triplets_k(int* d_cut_hit_triplets, float* d_hits, int* d_layer_of_hit, int num_cut_hit_triplets, int num_hits, int* d_tracks, int* d_tracks_write_location, float* d_track_parameters, int* d_track_parameters_write_location, float max_dist_from_plane, float max_dist_from_sz_line){ int idx = threadIdx.x + blockIdx.x * blockDim.x; if(idx < num_cut_hit_triplets){ int hit_list[MAX_TRACK_LENGTH]; /* list of hits */ int hit_list_length = 3; /* number of hits */ float s[MAX_TRACK_LENGTH]; /* arc lengths of hits */ float r_cg[3]; /* "center of gravity" vector of (x, y, w) of hits in this track candidate */ float n[3]; /* normalized normal vector of the plane from the plane fit */ float alpha, beta; /* parameters of sz line fit */ float c; //final output parameters float r, x0, y0, dip; /* initialize hit list from a hit triplet */ for(int i = 0; i < hit_list_length; i++){ hit_list[i] = d_cut_hit_triplets[idx * 3 + i]; } /* do plane fit and sz fit */ fit(d_hits, hit_list, s, hit_list_length, r_cg, n, alpha, beta); /* explore possible hits to add */ for(int next_hit = hit_list[hit_list_length - 1] + 1; next_hit < num_hits; next_hit++){ int last_hit = hit_list[hit_list_length - 1]; int layer_of_last_hit = d_layer_of_hit[last_hit]; /* no two hits of a track may be in the same layer */ int layer_of_next_hit = d_layer_of_hit[next_hit]; if(layer_of_next_hit == layer_of_last_hit){ continue; } c = -1. * (n[0] * r_cg[0] + n[1] * r_cg[1] + n[2] * r_cg[2]); x0 = -1. * n[0] / (2. * n[2]); y0 = -1. * n[1] / (2. * n[2]); r = sqrt((1. - n[2] * n[2] - 4. * c * n[2]) / (4. * n[2] * n[2])); /* distance from plane must be small */ float dist_from_plane_of_next_hit = c; for(int i = 0; i < 3; i++){ dist_from_plane_of_next_hit += d_hits[4 * next_hit + i] * n[i]; } if(dist_from_plane_of_next_hit > max_dist_from_plane){ continue; } /* compute s for possibly new hit */ float s_of_next_hit; float reference_x = d_hits[4 * hit_list[0] + 0] - x0; float reference_y = d_hits[4 * hit_list[0] + 1] - y0; float reference_z = d_hits[4 * hit_list[0] + 3]; float reference_mag = sqrt(reference_x * reference_x + reference_y * reference_y); reference_x /= reference_mag; reference_y /= reference_mag; float other_x = d_hits[4 * next_hit + 0] - x0; float other_y = d_hits[4 * next_hit + 1] - y0; float other_z = d_hits[4 * next_hit + 3]; float other_mag = sqrt(other_x * other_x + other_y * other_y); other_x /= other_mag; other_y /= other_mag; float delta_phi = acos(reference_x * other_x + reference_y * other_y); float z_difference = other_z - reference_z; if(z_difference < 0){ delta_phi *= -1.; } s_of_next_hit = delta_phi * r; /* deviation from sz line must be small */ float dist_from_sz_line_of_next_hit = fabs(s_of_next_hit - (alpha + beta * d_hits[4 * next_hit + 3])); if(dist_from_sz_line_of_next_hit > max_dist_from_sz_line){ continue; } /* limited track storage space */ if(hit_list_length >= MAX_TRACK_LENGTH){ continue; } /* add hit */ hit_list[hit_list_length] = next_hit; hit_list_length++; /* refit */ fit(d_hits, hit_list, s, hit_list_length, r_cg, n, alpha, beta); } /* all hit additions have been considered */ /* if track long enough, save */ if(hit_list_length >= MIN_TRACK_LENGTH){ x0 = -1. * n[0] / (2. * n[2]); y0 = -1. * n[1] / (2. * n[2]); r = sqrt((1. - n[2] * n[2] - 4. * c * n[2]) / (4. * n[2] * n[2])); dip = atan(1. / beta); //printf("d_track_starts_write_location: %d\n", *d_track_starts_write_location); int current_track_write_location = atomicAdd(d_tracks_write_location, hit_list_length + 2); int current_track_parameters_location = atomicAdd(d_track_parameters_write_location, 4); d_tracks[current_track_write_location + 0] = current_track_parameters_location; d_tracks[current_track_write_location + 1] = hit_list_length; d_track_parameters[current_track_parameters_location + 0] = r; d_track_parameters[current_track_parameters_location + 1] = x0; d_track_parameters[current_track_parameters_location + 2] = y0; d_track_parameters[current_track_parameters_location + 3] = dip; for(int i = 0; i < hit_list_length; i++){ d_tracks[current_track_write_location + i + 2] = hit_list[i]; } /* switch(hit_list_length){ case 3: printf("track: %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2]); break; case 4: printf("track: %d, %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2], hit_list[3]); break; case 5: printf("track: %d, %d, %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2], hit_list[3], hit_list[4]); break; case 6: printf("track: %d, %d, %d, %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2], hit_list[3], hit_list[4], hit_list[5]); break; case 7: printf("track: %d, %d, %d, %d, %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2], hit_list[3], hit_list[4], hit_list[5], hit_list[6]); break; case 8: printf("track: %d, %d, %d, %d, %d, %d, %d, %d\n", hit_list[0], hit_list[1], hit_list[2], hit_list[3], hit_list[4], hit_list[5], hit_list[6], hit_list[7]); break; default: printf("track: very long\n"); } */ } } } /** Extends hit triplets to track candidates. */ void RiemannTrackFinder::grow_seeds(){ if(opts.verbosity > 1){ cout<<"growing seeds"<>>(d_cut_hit_triplets, d_cut_hit_triplet_write_location, d_hits, d_hit_triplets, d_layer_of_hit, num_hit_triplets, opts.max_sz_chi_square); int num_cut_hit_triplets; cudaMemcpy(&num_cut_hit_triplets, d_cut_hit_triplet_write_location, sizeof(int), cudaMemcpyDeviceToHost); num_cut_hit_triplets /= 3; if(opts.verbosity > 8){ int h_cut_hit_triplets[num_cut_hit_triplets * 3]; cudaMemcpy(&h_cut_hit_triplets[0], d_cut_hit_triplets, num_cut_hit_triplets * 3 * sizeof(int), cudaMemcpyDeviceToHost); cout << "HIT TRIPLETS THAT PASSED SZ CUT:" << endl; for(int i = 0; i < num_cut_hit_triplets; i++){ cout << h_cut_hit_triplets[i * 3 + 0] << ", " << h_cut_hit_triplets[i * 3 + 1] << ", " << h_cut_hit_triplets[i * 3 + 2] << endl; } } extend_cut_hit_triplets_k<<>>(d_cut_hit_triplets, d_hits, d_layer_of_hit, num_cut_hit_triplets, hits.size(), d_tracks, d_tracks_write_location, d_track_parameters, d_track_parameters_write_location, opts.max_dist_from_plane, opts.max_dist_from_sz_line); } /** Transfers track information from device to host. */ void RiemannTrackFinder::transfer_tracks_to_host(){ int h_tracks[TRACK_SAVE_SPACE]; float h_track_parameters[MAX_NUM_TRACKS * 4]; cudaMemcpy(&h_tracks[0], d_tracks, TRACK_SAVE_SPACE * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(&h_track_parameters[0], d_track_parameters, MAX_NUM_TRACKS * 4 * sizeof(float), cudaMemcpyDeviceToHost); //convert to original hit id's //for(int i = 0; i < h_track_starts[num_tracks]; i++){ //h_tracks[i] = id_of_sorted_hit[h_tracks[i]]; //} /* int max_track_id = -1; for(int i = 0; i < hits.size(); i++){ cout << "hit " << i << " track id: " << hits[i].track_id << endl; if(hits[i].track_id > max_track_id){ max_track_id = hits[i].track_id; } } num_true_tracks = max_track_id + 1; */ unique_track_ids.clear(); for(int i = 0; i < hits.size(); i++){ if(hits.at(i).track_id == -1){ continue; } bool already_contains = false; for(int j = 0; j < unique_track_ids.size(); j++){ if(unique_track_ids.at(j) == hits.at(i).track_id){ already_contains = true; } } if(!already_contains){ unique_track_ids.push_back(hits.at(i).track_id); } } if(opts.verbosity > 1){ cout << "unique track id mapping" << endl; for(int i = 0; i < unique_track_ids.size(); i++){ cout << i << " --> " << unique_track_ids.at(i) << endl; } //num_true_tracks = unique_track_ids.size(); cout << "num_true_tracks = " << unique_track_ids.size() << endl; } int pos = 0; while(h_tracks[pos + 1] != 0){ int parameter_loc = h_tracks[pos + 0]; int track_length = h_tracks[pos + 1]; int hit_list_start = pos + 2; if(opts.verbosity > 1){ cout << "Track of length " << track_length << ": "; } Track track; for(int j = 0; j < unique_track_ids.size(); j++){ track.num_hits_from_true_track.push_back(0); } for(int j = 0; j < track_length; j++){ int hit_id = h_tracks[hit_list_start + j]; if(opts.verbosity > 1){ cout << hit_id << ", "; } track.hits.push_back(hit_id); int track_id = hits[hit_id].track_id; int reduced_track_id = -1; for(int k = 0; k < unique_track_ids.size(); k++){ if(track_id == unique_track_ids.at(k)){ reduced_track_id = k; } } if(reduced_track_id >= 0){ track.num_hits_from_true_track[reduced_track_id]++; } } track.r = h_track_parameters[parameter_loc + 0]; track.x0 = h_track_parameters[parameter_loc + 1]; track.y0 = h_track_parameters[parameter_loc + 2]; track.dip = h_track_parameters[parameter_loc + 3]; tracks.push_back(track); if(opts.verbosity > 1){ cout << endl; } pos += track_length + 2; } } /** Evaluates track finding performance and outputs several text file summaries. */ void RiemannTrackFinder::compare_tracks_to_true_tracks(){ //float fullness = .5; //float purity = .6; float fullness = .7; float purity = .7; if(opts.verbosity > 1){ cout << "num_true_tracks: " << unique_track_ids.size() << endl; for(int i = 0; i < tracks.size(); i++){ cout << "track " << i << " num_hits_from_true_track: "; for(int j = 0; j < unique_track_ids.size(); j++){ cout << tracks[i].num_hits_from_true_track[j] << ", "; } cout << endl; } cout << "Hit track id's:" << endl; for(int i = 0; i < hits.size(); i++){ cout << "Hit " << i << " track_id = " < 1){ cout << "true track " << i << " length: " << true_track_length << endl; } if(true_track_length >= MIN_TRACK_LENGTH){ num_min_length_true_tracks++; } bool found = false; for(int j = 0; j < tracks.size(); j++){ if(tracks[j].num_hits_from_true_track[i] * 1. / true_track_length > fullness && tracks[j].num_hits_from_true_track[i] * 1. / tracks[j].hits.size() > purity){ found = true; } } if(found){ num_true_tracks_found++; } } ostringstream builder; ofstream out; builder << opts.output_file << "_performance.csv"; out.open(builder.str().c_str()); ostringstream builder_2; ofstream out_2; builder_2 << opts.output_file << "_possible.csv"; out_2.open(builder_2.str().c_str()); ostringstream builder_3; ofstream out_3; builder_3 << opts.output_file << "_num_tracks.csv"; out_3.open(builder_3.str().c_str()); out_3 << num_true_tracks_found << " " << num_min_length_true_tracks << endl; float percent_found = num_true_tracks_found * 1. / unique_track_ids.size(); float percent_found_possible = num_true_tracks_found * 1. / num_min_length_true_tracks; if(unique_track_ids.size() == 0){ percent_found = -1; } if(num_min_length_true_tracks == 0){ percent_found_possible = -1; } ofstream count_check; count_check.open("count_check.txt", ios::app); if(percent_found_possible == -1){ count_check << 1 << endl; } count_check.close(); out << percent_found << " " << percent_found_possible << endl; out_2 << percent_found_possible << endl; cout << "===== Performance Report =====" << endl; cout << "(# true tracks found) / (# true tracks) = " << percent_found << endl; cout << "(# true tracks found) / (# true tracks of min length) = " << percent_found_possible << endl; float good_track_purity = .7; int num_good_tracks = 0; cout << "tracks.size() == " << tracks.size() << endl; for(int i = 0; i < tracks.size(); i++){ //for(int j = 0; j < tracks[i].num_hits_from_true_track.begin() if(tracks[i].num_hits_from_true_track.size() > 0){ if(*max_element(tracks[i].num_hits_from_true_track.begin(), tracks[i].num_hits_from_true_track.end()) * 1. / tracks[i].hits.size() > good_track_purity){ num_good_tracks++; } } } out_3 << tracks.size() << " " << num_good_tracks << endl; float percent_good = num_good_tracks * 1. / tracks.size(); if(tracks.size() == 0){ percent_good = -1; } out << percent_good << endl; cout << "(# good track candidates) / (# track candidates) = " << percent_good << endl; out.close(); out_2.close(); out_3.close(); } /** Writes track candidates' information to a file. */ void RiemannTrackFinder::write_tracks(){ ofstream out; ostringstream builder; builder << opts.output_file << "_tracks.csv"; out.open(builder.str().c_str()); for(int i = 0; i < tracks.size(); i++){ Track track = tracks.at(i); out << track.r << " " << track.x0 << " " << track.y0 << " " << track.dip << " " << track.hits.size() << " "; for(int j = 0; j < track.hits.size(); j++){ out << track.hits.at(j); if(j != track.hits.size() - 1){ out << " "; } } out << endl; } out.close(); }