#include #include #include #include #include using namespace std; #ifdef _WIN32 #include // _finite #define finite _finite #endif #define TOTALTIME #include "Stopwatch.h" #include #ifdef DEBUG_THREADS // const int coeff = 1; const int MaxNTracks = 20000;//20000 const int NFits = 1000;//10; const int Ntimes = 1;//int(3000.*coeff);//100; #else // if DEBUG_THREADS const int NFits = 10; #if defined(TBB) || defined(OMP) const int MaxNTracks = 80000; const int Ntimes = 100; #else const int MaxNTracks = 20000; const int Ntimes = 100;//int(3000.*coeff);//100; #endif // TBB #endif // ifndef DEBUG_THREADS #if defined(CONVENTIONAL) #ifdef DAF #include "DAFC.h" typedef DAFC FitInterface; #else #include "FitC.h" typedef FitC FitInterface; #endif #elif defined(SQUARE_ROOT1S) || defined(SQUARE_ROOT1) || defined(SQUARE_ROOT2) #ifdef DAF #include "DAFSR.h" typedef DAFSR FitInterface; #else #include "FitSR.h" typedef FitSR FitInterface; #endif #elif defined(UD_FILTERING_V) || defined(UD_FILTERING_S) #ifdef DAF #include "DAFUD.h" typedef DAFUD FitInterface; #else #include "FitUD.h" typedef FitUD FitInterface; #endif #endif /* Use Intel TBB for multithreading */ #ifdef TBB // #define DEBUG_THREADS // tbb with output #include "openlab_mod/multithread.h" #include "openlab_mod/parallel_for_simpleEqNThr.h" #include "openlab_mod/parallel_for_simpleHT.h" #include "openlab_mod/parallel_for_simple.h" #include "tbb/task_scheduler_init.h" #include "tbb/parallel_for.h" #include "tbb/blocked_range.h" using namespace tbb; #endif // TBB // default #if !(defined(ANALYTIC_EXTRAPOLATION) || defined(RK4_EXTRAPOLATION)) #define ANALYTIC_EXTRAPOLATION #endif #if !(defined(CONVENTIONAL) || defined(SQUARE_ROOT1) || defined(SQUARE_ROOT1S) || defined(SQUARE_ROOT2) || defined(UD_FILTERING_V) || defined(UD_FILTERING_S)) #define CONVENTIONAL #endif #define MUTE #if defined(TBB) || defined(OMP) #define COPY // copy NTracksPerThread tracks as more times as it's needed const int NTracksPerThread = 1000; #endif #ifdef OMP #include #include "pthread.h" #include "errno.h" #define handle_error_en(en, msg) do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) #endif #include #include string dataDir = "./data/"; int tasks = 16; /* #threads <= #tasks */ #ifdef TBB const bool useObserver = 1; // 1 for use thread-cpu correspondent const float speedUpHT = 1.04; // = time1logCore / time2logCoreOn1PhysCore. Need for optimize proc. usage. #endif // TBB Station* vStations; Track vTracks[MaxNTracks]; MCTrack vMCTracks[MaxNTracks]; int NStations = 0; int NTracks = 0; int NTracksV = 0; FieldRegion field0; FitInterface fitter; void ReadInput(){ fstream FileGeo, FileTracks, FileMCTracks; FileGeo.open( (dataDir+"geo.dat").c_str(), ios::in ); FileTracks.open( (dataDir+"tracks.dat").c_str(), ios::in ); FileMCTracks.open( (dataDir+"mctracksin.dat").c_str(), ios::in ); { FieldVector H[3]; Fvec_t Hz[3]; for( int i=0; i<3; i++){ Single_t Bx, By, Bz, z; FileGeo >> z >> Bx >> By >> Bz; Hz[i] = z; H[i].X = Bx; H[i].Y = By; H[i].Z = Bz; #ifndef MUTE cout<<"Input Magnetic field:"<> NStations; #ifndef MUTE cout<<"Input "<> ist; if( ist!=i ) break; Station &st = vStations[i]; FileGeo >> st.z >> st.thick >> st.RL >> st.UInfo.sigma2 >> st.VInfo.sigma2; st.UInfo.sigma2 *= st.UInfo.sigma2; st.VInfo.sigma2 *= st.VInfo.sigma2; st.UInfo.sigma216 = st.UInfo.sigma2*16.f; st.VInfo.sigma216 = st.VInfo.sigma2*16.f; if( i < 2 ){ // mvd //TODO From Geo File!!! st.UInfo.cos_phi = 1.f; st.UInfo.sin_phi = 0.f; st.VInfo.cos_phi = 0.f; st.VInfo.sin_phi = 1.f; } else{ st.UInfo.cos_phi = 1.f; // 0 degree st.UInfo.sin_phi = 0.f; st.VInfo.cos_phi = 0.9659258244f; // 15 degree st.VInfo.sin_phi = 0.2588190521f; } Fvec_t idet = st.UInfo.cos_phi*st.VInfo.sin_phi - st.UInfo.sin_phi*st.VInfo.cos_phi; idet = 1.f/(idet*idet); st.XYInfo.C00 = ( st.VInfo.sin_phi*st.VInfo.sin_phi*st.UInfo.sigma2 + st.UInfo.sin_phi*st.UInfo.sin_phi*st.VInfo.sigma2 )*idet; st.XYInfo.C10 =-( st.VInfo.sin_phi*st.VInfo.cos_phi*st.UInfo.sigma2 + st.UInfo.sin_phi*st.UInfo.cos_phi*st.VInfo.sigma2 )*idet; st.XYInfo.C11 = ( st.VInfo.cos_phi*st.VInfo.cos_phi*st.UInfo.sigma2 + st.UInfo.cos_phi*st.UInfo.cos_phi*st.VInfo.sigma2 )*idet; #ifndef MUTE #ifdef X87 cout<<" "<> N; #ifndef MUTE cout<> st.Map.X[i]; for( int i=0; i> st.Map.Y[i]; for( int i=0; i> st.Map.Z[i]; } { // field intergals with respect to Last station Fvec_t z0 = vStations[NStations-1].z; Fvec_t sy = 0.f, Sy = 0.f; for( int i=NStations-1; i>=0; i-- ){ Station &st = vStations[i]; Fvec_t dz = st.z-z0; Fvec_t Hy = vStations[i].Map.Y[0]; Sy += dz*sy + dz*dz*Hy/2.f; sy += dz*Hy; st.SyL = Sy; z0 = st.z; } // field intergals with respect to First station z0 = vStations[0].z; sy = 0.f, Sy = 0.f; for( int i=0; i>itr; //if( itr!=NTracks ) break; if( NTracks>=MaxNTracks ) break; Track &t = vTracks[NTracks]; MCTrack &mc = vMCTracks[NTracks]; FileTracks >> mc.MC_x >> mc.MC_y >> mc.MC_z >> mc.MC_px >> mc.MC_py >> mc.MC_pz >> mc.MC_q >> t.NHits; for( int i=0; i> ist; t.vHits[i].ista = ist; FileTracks >> t.vHits[i].x >> t.vHits[i].y; } TrackIndex[NTracks] = itr; if( t.NHits==NStations ) NTracks++; } int NMCTracks=0; int iPoint = 0; while( !FileMCTracks.eof() ){ int itr; FileMCTracks>>itr; //if( itr!=NTracks ) break; if( NMCTracks>=MaxNTracks ) break; MCTrack &mc = vMCTracks[NMCTracks]; Single_t temp; int NMCPoints; FileMCTracks >> temp >> temp >> temp >> temp >> temp >> temp >> temp >> NMCPoints; mc.NMCPoints = NMCPoints; for( int i=0; i> ist; mc.vPoints[i].ista = ist; FileMCTracks >> mc.vPoints[i].x >> mc.vPoints[i].y>>mc.vPoints[i].z>>mc.vPoints[i].px>>mc.vPoints[i].py>>mc.vPoints[i].pz; #ifdef DAF // if(ist == 5) { // shift for DAF // const float coeff = 10.f; // // vTracks[NMCTracks].vHits[ist].x = mc.vPoints[i].x + sqrt(vStations[ist].UInfo.sigma2[0]) * coeff; // // cout << sqrt(vStations[ist].UInfo.sigma2[0]) << " U " << sqrt(vStations[ist].VInfo.sigma2[0]) << endl; // // cout << sqrt(vStations[ist].XYInfo.C00[0]) << " X " << sqrt(vStations[ist].XYInfo.C11[0]) << endl; // vTracks[NMCTracks].vHits[ist].x = mc.vPoints[i].x + sqrt(vStations[ist].XYInfo.C00[0]) * coeff; // vTracks[NMCTracks].vHits[ist].y = mc.vPoints[i].y + sqrt(vStations[ist].XYInfo.C11[0]) * coeff; // } #endif } #ifdef DAF // Track &t = vTracks[NMCTracks]; // for( int i=0; i MaxNTracks ) { continue; } vTracks[j * NTracksPerThread + i] = vTracks[i]; vMCTracks[j * NTracksPerThread + i] = vMCTracks[i]; #pragma omp atomic NTracks++; } } } #endif // COPY NTracksV = NTracks/vecN; NTracks = NTracksV*vecN; } void WriteOutput(){ fstream Out, Diff; Out.open( (dataDir + "fit.dat").c_str(), ios::out ); #ifdef DAF Out << "Smoother" << endl; #else Out << "Fitter" << endl; #endif // DAF Out << MaxNTracks << endl; for( int it=0, itt=0; itt fitSpeedCPU(tasks,0); std::vector fitSpeedReal(tasks,0); std::vector nFittedTracks(tasks,0); Stopwatch timerLocal; Stopwatch timer; Stopwatch timer2; // Stopwatch timer_test; timer.Start(); for( int times=0; times 2 // use affinity_partitioner // static simple_partitioner ap; // static affinity_partitioner ap; // standart // parallel_for(blocked_range(0, NTracksV, NTracksV / tasks), ApplyFit(TracksV, vStations, NStations, NFits), ap); // use auto partitioning. Good then tasks != 2^N // static auto_partitioner ap; // parallel_for(blocked_range(0, NTracksV), ApplyFit(TracksV, vStations, NStations, NFits), ap); { // use special partitioning. Good used with fixed thread-cpu correspondent by observer // if (speedUpHT >= 0){ // parallel_for_simpleHT(NTracksV, tasks, ApplyFit(TracksV, vStations, NStations, NFits),speedUpHT); // } // else{ // parallel_for_simple(NTracksV, 10, tasks, ApplyFit(TracksV, vStations, NStations, NFits)); // }; { // fix nTask on thread const int nTracksVOnThread = NTracksPerThread/vecN; parallel_for_simpleEqNThr(nTracksVOnThread, tasks, ApplyFit(TracksV, vStations, NStations, NFits, &fitSpeedCPU, &nFittedTracks)); NTracks = nTracksVOnThread*vecN*tasks; // for time counters } } #else int ifit; int iV; int nFittedTracks = 0; #ifndef OMP tasks = 1; #endif #pragma omp parallel num_threads(tasks) private(iV,ifit) firstprivate(NTracksV,NStations, nFittedTracks,timerLocal) { timerLocal.Start(); #pragma omp for nowait //schedule(static) for( iV=0; iV 0) fitSpeedCPU[curThredNum] = nFittedTracks/cpuTimeLocal; else fitSpeedCPU[curThredNum] = 0; if(realTimeLocal > 0) fitSpeedReal[curThredNum] = nFittedTracks/realTimeLocal; else fitSpeedReal[curThredNum] = 0; } #endif timer2.Stop(); TimeTable[times]=timer2.RealTime(); } timer.Stop(); #ifdef DAF // print results int NP[MaxNStations] = {0,0,0,0,0,0,0,0,0,0}; // TODO move out for(int iSt=0; iSt < NStations; iSt++) { for( int iV=0; iV