// @(#)root/tmva $Id: TMVAClassification.C 44112 2012-05-04 10:00:41Z evt $ /********************************************************************************** * Project : TMVA - a ROOT-integrated toolkit for multivariate data analysis * * Package : TMVA * * Root Macro: TMVAClassification * * * * This macro provides examples for the training and testing of the * * TMVA classifiers. * * * * As input data is used a toy-MC sample consisting of four Gaussian-distributed * * and linearly correlated input variables. * * * * The methods to be used can be switched on and off by means of booleans, or * * via the prompt command, for example: * * * * root -l ./TMVAClassification.C\(\"Fisher,Likelihood\"\) * * * * (note that the backslashes are mandatory) * * If no method given, a default set of classifiers is used. * * * * The output file "TMVA.root" can be analysed with the use of dedicated * * macros (simply say: root -l ), which can be conveniently * * invoked through a GUI that will appear at the end of the run of this macro. * * Launch the GUI via the command: * * * * root -l ./TMVAGui.C * * * **********************************************************************************/ #include #include #include #include #include "TChain.h" #include "TFile.h" #include "TTree.h" #include "TString.h" #include "TObjString.h" #include "TSystem.h" #include "TROOT.h" #if not defined(__CINT__) || defined(__MAKECINT__) // needs to be included when makecint runs (ACLIC) #include "TMVA/Factory.h" #include "TMVA/Tools.h" #endif void myTMVAClassification( TString myMethodList = "" ) { // The explicit loading of the shared libTMVA is done in TMVAlogon.C, defined in .rootrc // if you use your private .rootrc, or run from a different directory, please copy the // corresponding lines from .rootrc // methods to be processed can be given as an argument; use format: // // mylinux~> root -l TMVAClassification.C\(\"myMethod1,myMethod2,myMethod3\"\) // // if you like to use a method via the plugin mechanism, we recommend using // // mylinux~> root -l TMVAClassification.C\(\"P_myMethod\"\) // (an example is given for using the BDT as plugin (see below), // but of course the real application is when you write your own // method based) //--------------------------------------------------------------- // This loads the library TMVA::Tools::Instance(); // to get access to the GUI and all tmva macros TString tmva_dir(TString(gRootDir) + "/tmva"); if(gSystem->Getenv("TMVASYS")) tmva_dir = TString(gSystem->Getenv("TMVASYS")); gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() ); gROOT->ProcessLine(".L TMVAGui.C"); // Default MVA methods to be trained + tested std::map Use; // --- Cut optimisation Use["Cuts"] = 0; Use["CutsGA"] = 0; Use["CutsSA"] = 0; // --- 1-dimensional likelihood ("naive Bayes estimator") Use["LikelihoodMIX"] = 0; // --- Mutidimensional likelihood and Nearest-Neighbour methods Use["PDEFoam"] = 0; // --- Linear Discriminant Analysis Use["Fisher"] = 0; // --- Neural Networks (all are feed-forward Multilayer Perceptrons) Use["MLP"] = 0; // Recommended ANN // --- Boosted Decision Trees Use["BDT"] = 1; // uses Adaptive Boost // --------------------------------------------------------------- std::cout << std::endl; std::cout << "==> Start TMVAClassification" << std::endl; // Select methods (don't look at this code - not of interest) if (myMethodList != "") { for (std::map::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0; std::vector mlist = TMVA::gTools().SplitString( myMethodList, ',' ); for (UInt_t i=0; i::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " "; std::cout << std::endl; return; } Use[regMethod] = 1; } } // -------------------------------------------------------------------------------------------------- // --- Here the preparation phase begins // Create a ROOT output file where TMVA will store ntuples, histograms, etc. TString outfileName( "TMVA.root" ); TFile* outputFile = TFile::Open( outfileName, "RECREATE" ); // Create the factory object. Later you can choose the methods // whose performance you'd like to investigate. The factory is // the only TMVA object you have to interact with // // The first argument is the base of the name of all the // weightfiles in the directory weight/ // // The second argument is the output file for the training results // All TMVA output can be suppressed by removing the "!" (not) in // front of the "Silent" argument in the option string TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ); // If you wish to modify default settings // (please check "src/Config.h" to see all available global options) // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0; // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"; // Define the input variables that shall be used for the MVA training // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" // [all types of expressions that can also be parsed by TTree::Draw( "expression" )] // factory->AddVariable( "myvar1 := var1+var2", 'F' ); // factory->AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ); // factory->AddVariable( "var3", "Variable 3", "units", 'F' ); // factory->AddVariable( "var4", "Variable 4", "units", 'F' ); factory->AddVariable( "LMDTrackQ.fThetarecLMD", 'F' ); factory->AddVariable( "LMDTrackQ.fPhirecLMD", 'F' ); factory->AddVariable( "LMDTrackQ.fXrecLMD", 'F' ); factory->AddVariable( "LMDTrackQ.fYrecLMD", 'F' ); // // You can add so-called "Spectator variables", which are not used in the MVA training, // // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the // // input variables, the response values of all trained MVAs, and the spectator variables // factory->AddSpectator( "spec1 := aphrec","phi", "rad", 'F' ); // factory->AddSpectator( "spec2 := athrec", "theta", "mrad", 'F' ); // factory->AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); // Read training and test data // (it is also possible to use ASCII format as input -> see TMVA Users Guide) // TString fname_bkg = "/run/media/karavdina/STORAGE/NOISE_hits/mom_15/NOISEbkg_th430/Lumi_TrksQA_0.root"; TString fname_bkg = "/run/media/karavdina/STORAGE/NOISE_hits/mom_1_5/NOISEbkg_th430_3frames/Lumi_TrksQA_0.root"; TFile *input_bkg = TFile::Open(fname_bkg); std::cout << "--- TMVAClassification : Using BKG input file for training: " << input_bkg->GetName() << std::endl; TTree *backgroundTrain = (TTree*)input_bkg->Get("cbmsim"); // input_bkg->Close(); TString fname_sig = "/run/media/karavdina/STORAGE/NOISE_hits/mom_1_5/DPMsig_AllCuts/Lumi_TrksQA_0.root"; TFile *input_sig = TFile::Open(fname_sig); std::cout << "--- TMVAClassification : Using SIG input file for training: " << input_sig->GetName() << std::endl; // TTree *backgroundTrain = (TTree*)input_bkg->Get("cbmsim"); TTree *signalTrain = (TTree*)input_sig->Get("cbmsim"); // input_sig->Close(); Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; factory->AddSignalTree ( signalTrain, signalWeight); factory->AddBackgroundTree( backgroundTrain, backgroundWeight); // --- Register the training and test trees // Apply additional cuts on the signal and background samples (can be different) // TCut mycuts = "abs(azrec)<10. && abs(ayrec)<100. && abs(axrec)<100."; // TCut mycutb = "abs(azrec)<10. && abs(ayrec)<100. && abs(axrec)<100."; TCut mycuts = "LMDTrackQ.fTrkRecStatus>=0 && abs(LMDTrackQ.fThetamc-LMDTrackQ.fThetarec)<3*7.46691e-04"; //@1.5 GEV //TCut mycuts = "LMDTrackQ.fTrkRecStatus>=0 && abs(LMDTrackQ.fThetamc-LMDTrackQ.fThetarec)<3.*1.01792e-04"; //@15 GEV TCut mycutb = "LMDTrackQ.fTrkRecStatus>=0"; // Tell the factory how to use the training and testing events // // If no numbers of events are given, half of the events in the tree are used // for training, and the other half for testing: // factory->PrepareTrainingAndTestTree( mycuts,mycutb,"NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ); // factory->PrepareTrainingAndTestTree(mycuts, mycutb,"nTrain_Signal=100000:nTrain_Background=100000:nTest_Signal=100000:nTest_Background=100000:SplitMode=Random:NormMode=NumEvents:!V" ); //use small for training and all for test factory->PrepareTrainingAndTestTree(mycuts, mycutb,"nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ); //use all // ---- Book MVA methods // // Please lookup the various method configuration options in the corresponding cxx files, eg: // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html // it is possible to preset ranges in the option string in which the cut optimisation should be done: // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable if (Use["Cuts"]) factory->BookMethod( TMVA::Types::kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ); if (Use["CutsGA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsGA", "!H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95" ); if (Use["CutsSA"]) factory->BookMethod( TMVA::Types::kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); // Use a variable-dependent mix of splines and kernel density estimator if (Use["LikelihoodMIX"]) factory->BookMethod( TMVA::Types::kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ); // Multi-dimensional likelihood estimator using self-adapting phase-space binning if (Use["PDEFoam"]) factory->BookMethod( TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ); // Fisher discriminant (same as LD) if (Use["Fisher"]) factory->BookMethod( TMVA::Types::kFisher, "Fisher", "!H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=100:NsmoothMVAPdf=10" ); // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if (Use["MLP"]) factory->BookMethod( TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ); if (Use["BDT"]) // Adaptive Boost // factory->BookMethod( TMVA::Types::kBDT, "BDT", // "!H:!V:CreateMVAPdfs:NTrees=850:MaxDepth=4:BoostType=AdaBoost:AdaBoostBeta=0.5:nCuts=2000:PruneMethod=NoPruning"); factory->BookMethod( TMVA::Types::kBDT, "BDT", "!H:!V:CreateMVAPdfs:NTrees=850:MaxDepth=4:BoostType=Grad:nCuts=100:PruneMethod=NoPruning:SeparationType=MisClassificationError:DoBoostMonitor=True:UseRandomisedTrees=True:UseNvars=3"); // -------------------------------------------------------------------------------------------------- // ---- Now you can optimize the setting (configuration) of the MVAs using the set of training events // factory->OptimizeAllMethods("SigEffAt001","Scan"); // factory->OptimizeAllMethods("ROCIntegral","GA"); // -------------------------------------------------------------------------------------------------- // ---- Now you can tell the factory to train, test, and evaluate the MVAs // Train MVAs using the set of training events factory->TrainAllMethods(); // ---- Evaluate all MVAs using the set of test events factory->TestAllMethods(); // ----- Evaluate and compare performance of all configured MVAs factory->EvaluateAllMethods(); // -------------------------------------------------------------- // Save the output outputFile->Close(); std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl; std::cout << "==> TMVAClassification is done!" << std::endl; delete factory; // Launch the GUI for the root macros if (!gROOT->IsBatch()) TMVAGui( outfileName ); }