/* * * S.Vanniarajan V.Suyam.Jothi@kvi.nl * * This is the Mutivariate Event classification tool * designed for PANDAROOT Analysis package for * PANDA Experiments. * TMVA(Toolkit for MultiVariate Analysis) is a * two class classifier(signal and background). * available with ROOT package. mainly used for * Event Classification in High Energy Physics Experiments. * * This tool here is Designed from TMVA, for Multiclass * Classification purpose. * * S.Vanniarajan 01-08-08 * Modified: * M. Babai */ #include "PndMultiClassMVA.h" // Default constructor with default parameters for the training PndMultiClassMVA::PndMultiClassMVA() { fNCLASS = 0; fNVAR = 0; fNSigTrain = "100"; fNSigTest = "100"; fNSigTrain = "100"; fNSigTest = "100"; fPruneStrengthBDT = "4.0"; fBoostTypeBDT = "AdaBoost"; fNCutsBDT = "10"; fNTreeBDT = "100"; fNKNN = "40"; } // Default destructor PndMultiClassMVA::~PndMultiClassMVA() { fVarNameArray.clear(); fClassNameArray.clear(); } // Method to add Variable for the classification void PndMultiClassMVA::AddVar(string varName) { fVarNameArray.push_back(varName); fNVAR = fNVAR +1; } // Method to add class name for the classification void PndMultiClassMVA::AddClass(string className) { fClassNameArray.push_back(className); fNCLASS = fNCLASS +1; } // Method to create a configuration file which Idntifies it self with // the application the the classifier is used void PndMultiClassMVA::WriteConfigFile() { TString fileName = fAPPNAME + ".dat"; TString varString = ":"; TString classString = ":"; fstream config; config.open(fileName,fstream::out); config< Current implementation of the MultiClsKNN"<< std::endl <<"\tcreates root trees to store the event parameters and"<< std::endl <<"\tperforms no computations. If you want to use this,"<SetLearnPrameters(m_initConst, m_ethaZero, m_ethaFinal, m_NumSweep); lvq->Train(m_numLVQProto, m_OutFileName); delete lvq; break; case LVQ21://Multi class LVQ21 lvq = new PndLVQTrain (fINFILENAME, fClassNameArray, fVarNameArray); // Set learning parameters //lvq->SetLearnPrameters(0.8,0.1,0.00001,1000); lvq->SetLearnPrameters(m_initConst, m_ethaZero, m_ethaFinal, m_NumSweep); lvq->Train21(m_numLVQProto, m_OutFileName); delete lvq; break; case TMBDT://BDT from TMVA MvaConfig = "!H:!V:NTrees=" + fNTreeBDT + ":BoostType=" + fBoostTypeBDT + ":SeparationType=GiniIndex:nCuts=" + fNCutsBDT + "PruneMethod=NoPruning:PruneStrength=" + fPruneStrengthBDT; TrainTestTM(TMBDT, MvaConfig); break; case TMMLP://MLP form TMVA MvaConfig = "Normalise:H:!V:NeuronType="+mlpNeuTyp+ ":NCycles="+mlpCycle+":HiddenLayers="+mlpNumHidden+":TestRate="+ mlpTestRate; TrainTestTM(TMMLP, MvaConfig); break; case TMKNN://KNN from TMVA MvaConfig = "nkNN=" + fNKNN + ":V:TreeOptDepth="+mKnnDepth+":ScaleFrac="+mKnnscalefrac+ ":!UseKernel:"+mKnnSelOpt; TrainTestTM(TMKNN, MvaConfig); break; default: std::cout << " NO classifier was selected." << std::endl; break; } } /* * This method is implemented because of the fact that the current * implementation of TMVA does not support multi class MVA's. Thus we * need to train a classifier for each class of objects. This method * might disappear if the newer versions of this package support multi * class properties. * @param mva: Defines the MVA type to be trained. * @param config: Defines the configuration string to be used by TMVA. */ void PndMultiClassMVA::TrainTestTM(MVAType mva, const std::string config) { TFile *input(0); if (fNCLASS < 2 ) { std::cout<< " you need atleast two" <<" classes for the classification. "< you need atleast two" <<" variables for the Multivariate Analysis. "<AccessPathName( fINFILENAME )) { std::cout << "--- BDTAnalysis : accessing " << fINFILENAME << std::endl; input = TFile::Open( fINFILENAME ); } else if (!input) { std::cout << " could not open data file" << std::endl; return; } std::vector TreeArray; for (int i = 0; i < fNCLASS ; i++){ TString s,treeName; treeName = fClassNameArray.at(i); std::cout << (TTree*)input->Get(treeName) << std::endl; TreeArray.push_back( (TTree*)input->Get(treeName) ); } for (int i = 0 ; i < fNCLASS ; i++ ){ TString s,OutFileName,anaName; anaName = fAPPNAME +fClassNameArray.at(i); s = s +".root"; OutFileName = fClassNameArray.at(i) + ".root"; TFile* outputFile = TFile::Open( OutFileName, "RECREATE" ); TMVA::Factory *factory = new TMVA::Factory( anaName, outputFile, Form("!V:%sColor", 0?"!":"")); Double_t signalWeight = 1.0; Double_t backgroundWeight = 1.0; for (int j = 0 ; j < fNCLASS ; j++ ){ if (i == j ){ factory->AddSignalTree(TreeArray[j],signalWeight); } else{ factory->AddBackgroundTree(TreeArray[j],backgroundWeight); } } for (int k = 0 ; k < fNVAR ; k++ ){ TString varName; varName = fVarNameArray.at(k); factory->AddVariable(varName,'F'); } // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1"; TCut mycuts = fVarCuts; //"p<100&&emc<7"; // for example: TCut mycutb = "abs(var1)<0.5"; TCut mycutb = fVarCuts; //"p<100&&emc<7"; TString trainConfig = "NSigTrain=" + fNSigTrain + ":NBkgTrain=" + fNBkgTrain + ":NSigTest=" + fNSigTest + ":NBkgTest=" +fNBkgTest + ":SplitMode=Random:!V"; cout<PrepareTrainingAndTestTree( mycuts, mycutb, trainConfig ); // Select which classifier to use. switch(mva){ case TMBDT: factory->BookMethod( TMVA::Types::kBDT, "BDT", config); break; case TMMLP: factory->BookMethod( TMVA::Types::kMLP, "MLP", config); break; case TMKNN: factory->BookMethod( TMVA::Types::kKNN, "KNN", config); break; default: std::cout << " NO TMVA classifier was selected." << std::endl; break; } factory->TrainAllMethods(); factory->TestAllMethods(); // factory->EvaluateAllMethods(); outputFile->Close(); delete factory; } for(unsigned int k = 0; k < TreeArray.size(); k++) { delete TreeArray[k]; } TreeArray.clear(); WriteConfigFile(); } ClassImp(PndMultiClassMVA);