/* ********************************************** * MVA classifiers trainers interface. * * Author: M. Babai * * M.Babai@rug.nl * * Version: * * LICENSE: * * ********************************************** */ #include "PndMvaTrainer.h" using namespace std; //! Constructor PndMvaTrainer::PndMvaTrainer(std::string const& InPut, std::vector const& ClassNames, std::vector const& VarNames, bool trim) : m_dataSets(InPut, ClassNames, VarNames, TRAIN), m_trim(trim) {} //! Destructor PndMvaTrainer::~PndMvaTrainer() { m_testSet_indices.clear(); m_StepErro.clear(); } void PndMvaTrainer::Initialize() { m_dataSets.Initialize(); // Trim data set if(m_trim) { m_dataSets.Trim(); } // Initialize class conditional means. m_dataSets.InitClsCondMeans(); // Init random seed for this run. time_t seconds; seconds = time (NULL); m_RND_seed = seconds; } /** * Creates test and train data sets. * @param percent Percent of the data set to be used for testing and * cross-validation */ void PndMvaTrainer::splitTetsSet(int percent) { TRandom3 rndIndx(m_RND_seed); std::vector*> > const& events = m_dataSets.GetData(); assert( events.size() != 0); unsigned int TestEvtCnt = (percent * events.size()) / 100 ; std::cout << " preparing train and test sets.\n" << " Test set containes "<< TestEvtCnt <<" events and train set "<< (events.size() - TestEvtCnt) << '\n'; while(m_testSet_indices.size() < TestEvtCnt) { int trindx = static_cast(rndIndx.Uniform(0.0, events.size() - 1)); m_testSet_indices.insert(trindx); } } void PndMvaTrainer::WriteErroVect(std::string const& FileName) { std::ofstream Outfile; Outfile.open(FileName.c_str(), ios::out| ios::trunc); Outfile << "# Error vector output\n"; Outfile << "# \t \t\n"; for(size_t i = 0; i < m_StepErro.size(); i++) { Outfile << " " << (m_StepErro[i]).m_step <<"\t" << " " << (m_StepErro[i]).m_trErr <<"\t" << " " << (m_StepErro[i]).m_tsErr << '\n'; } Outfile << std::flush; Outfile.close(); } /** * Write the training and normalization data to outFile. */ void PndMvaTrainer::WriteToWeightFile(std::vector< std::pair*> > const& weights) { std::cout << " Writing classifier Output to " << m_outFile << '\n'; // Get labels std::vector const& classes = m_dataSets.GetClasses(); // Get variable names. std::vector const& vars = m_dataSets.GetVars(); /* * Open out-put file and write coordinates. */ if(m_outFile == "") { std::cerr << " The output file name could not be an empty string.\n" << " Set the outPut name if you want to store" << " the generated weights." << std::endl; return; } TFile out (m_outFile.c_str(), "RECREATE", "WeightFileOutput", 9); for(size_t cls = 0; cls < classes.size(); cls++) { std::vector buffer(vars.size(), 0.0); std::string name = classes[cls].Name; std::string desc = "Description Of " + name; char const* treeName = name.c_str(); char const* treeDesc = desc.c_str(); // Create a tree TTree sig (treeName, treeDesc); // Create branches and bind the variables for(size_t j = 0; j < vars.size(); j++) { std::string vname = vars[j].Name; std::string leaf = vname + "/F" ; char const* bname = vname.c_str(); char const* lname = leaf.c_str(); // Bind the parameters to the tree elements. sig.Branch(bname, &buffer[j], lname); } // Fill The tree for(size_t i = 0; i < weights.size(); i++) { if(weights[i].first == name) { for(size_t k = 0; k < buffer.size(); k++) { buffer[k] = (weights[i].second)->at(k); } sig.Fill(); } } // Write the created tree std::cout << " Writing weights for " << name << '\n'; sig.Write(); }//End for cls // _______________ Normalization and transformation data _________ //______________________ Write normFactors std::vector buffer(vars.size(), 0.0); std::string name = "NormFact"; std::string desc = "desc of " + name; TTree fact(name.c_str(), desc.c_str()); // Create branches and bind the variables for(size_t j = 0; j < vars.size(); j++) { std::string vname = vars[j].Name; std::string leaf = vname + "/F" ; char const* bname = vname.c_str(); char const* lname = leaf.c_str(); // Bind the parameters to the tree elements. fact.Branch(bname, &buffer[j], lname); } // Fill the trees. for(size_t i = 0; i < vars.size(); i++) { buffer[i] = vars[i].NormFactor; } fact.Fill(); fact.Write(); //______________________ Write mean name = "Means"; desc = "desc of " + name; TTree meanTree(name.c_str(), desc.c_str()); // Create branches and bind the variables for(size_t j = 0; j < vars.size(); j++) { std::string vname = vars[j].Name; std::string leaf = vname + "/F" ; char const* bname = vname.c_str(); char const* lname = leaf.c_str(); // Bind the parameters to the tree elements. meanTree.Branch(bname, &buffer[j], lname); } // Fill the trees. for(size_t i = 0; i < vars.size(); i++) { buffer[i] = vars[i].Mean; } meanTree.Fill(); meanTree.Write(); //_______________ Write PCA Data if(m_dataSets.Used_PCA()) { PndMvaVarPCATransform const& pca_tmp = m_dataSets.Get_PCA(); TVectorD const& MeanVals = pca_tmp.GetMeanValues(); TMatrixD const& EigenVects = pca_tmp.GetEigenVectors(); // Write to output. MeanVals.Write("PCAMeans"); EigenVects.Write("PCAEigenVectors"); } //___________ List of classes, variables, Object names, .... // Classes. TObjArray Labels(0, 0); Labels.SetName("Labels"); Labels.SetOwner(kTRUE); // Add labels for(size_t cls = 0; cls < classes.size(); cls++) { std::string la = classes[cls].Name; Labels.Add(new TObjString(la.c_str())); } Labels.Write("Labels", TObject::kSingleKey); // Variables TObjArray variables(0, 0); variables.SetName("Variables"); variables.SetOwner(kTRUE); // Add variables for(size_t j = 0; j < vars.size(); j++) { std::string vn = vars[j].Name; variables.Add(new TObjString(vn.c_str())); } variables.Write("Variable", TObject::kSingleKey); // List of other performed operations input. TObjArray Modifiers (0, 0); Modifiers.SetName("Modifiers"); Modifiers.SetOwner(kTRUE); Modifiers.Add(new TObjString("Means")); Modifiers.Add(new TObjString("NormFact")); if(m_dataSets.Used_PCA()) { Modifiers.Add(new TObjString("PCAMeans")); Modifiers.Add(new TObjString("PCAEigenVectors")); } Modifiers.Write("Modifiers", TObject::kSingleKey); //__________________________________ //Close open the file out.Close(); } void PndMvaTrainer::WriteToWeightFile(std::vector const& foamList) { std::cout << " Writing Foams to file " << m_outFile << '\n'; /* * Open out-put file and write coordinates. */ if(m_outFile == "") { std::cerr << " The output file name could not be an empty string.\n" << " Set the outPut name if you want to store" << " the generated weights." << std::endl; return; } TFile rootFile (m_outFile.c_str(), "RECREATE", "foamfile", 9); for(size_t j = 0; j < foamList.size(); j++) { foamList[j]->Write(foamList[j]->GetFoamName().Data()); // DEBUG FIXME foamList[j]->Print(); std::cout << " i is: " << typeid(foamList[j]).name() << std::endl; } std::vector const& vars = m_dataSets.GetVars(); // Write normFactors std::vector buffer(vars.size(), 0.0); std::string name = "NormFact"; std::string desc = "desc of " + name; TTree fact(name.c_str(), desc.c_str()); // Create branches and bind the variables for(size_t j = 0; j < vars.size(); j++) { std::string vname = vars[j].Name; std::string leaf = vname + "/F" ; char const* bname = vname.c_str(); char const* lname = leaf.c_str(); // Bind the parameters to the tree elements. fact.Branch(bname, &buffer[j], lname); } // Fill the trees. for(size_t i = 0; i < vars.size(); i++) { buffer[i] = vars[i].NormFactor; } fact.Fill(); fact.Write(); // Write mean name = "Means"; desc = "desc of " + name; TTree meanTree(name.c_str(), desc.c_str()); // Create branches and bind the variables for(size_t j = 0; j < vars.size(); j++) { std::string vname = vars[j].Name; std::string leaf = vname + "/F" ; char const* bname = vname.c_str(); char const* lname = leaf.c_str(); // Bind the parameters to the tree elements. meanTree.Branch(bname, &buffer[j], lname); } // Fill the trees. for(size_t i = 0; i < vars.size(); i++) { buffer[i] = vars[i].Mean; } meanTree.Fill(); meanTree.Write(); // Close open file rootFile.Close(); } /* void PndMvaTrainer::WriteDataSetToOutFile() { if(m_outFile.size() == 0) { std::cerr << " Empty Output File Name." << std::endl; return; } m_dataSets.WriteDataSet(m_outFile); } */ //! Select input data normalization scheme. void PndMvaTrainer::NormalizeData(NormType t) { m_normType = t; //m_dataSets.SetNormType(t); m_dataSets.NormalizeDataSet(t); } /** * Parameter decorrelation. * * Performs PCA (Principal component analysis) on the input dataset. */ void PndMvaTrainer::PCATransForm() { m_dataSets.Use_PCA(true); m_dataSets.PCATransForm(); }