//#pragma once #ifndef PNDMVADATASET_H #define PNDMVADATASET_H // C++ includes #include #include #include #include #include #include #include #include #include #include #include // ROOT #include "TFile.h" #include "TTree.h" #include "TRandom3.h" #include "TMVA/PDEFoam.h" // Local includes #include "PndMvaClass.h" #include "PndMvaVariable.h" // ======================================================================== // Normalization schemes typedef enum {NONE = 1, VARX = 2, MINMAX = 3, MEDIAN = 4} NormType; // ======================================================================== class PndMvaDataSet { public: PndMvaDataSet(const std::string& inputFilename, const std::vector& classNames, const std::vector& varNames); virtual ~PndMvaDataSet(); /** * Normalize event dataset using one of available methods. * @param t Normalization type (VARX, MINMAX, MEDIAN). */ void NormalizeDataSet(const NormType type = NONE); /** * Write the normalized DataSet to the out-put file. * @param outFile File name to write to */ void WriteDataSet(const std::string& outFile); /** * Initialize the class conditional means vectors. */ void InitClsCondMeans(); /** * Creates a data set with equal number of events for each class. */ void Trim(); const std::vector< std::pair*> >& GetData() const; const std::vector& GetClasses() const; const std::vector& GetVars() const; const std::map< std::string, std::vector* >& GetClassCondMeans() const; const std::string& GetInFileName() const; protected: /** * Read input event data. *@param InPut Input file name. */ void ReadInput(); private: PndMvaDataSet(const PndMvaDataSet& other); PndMvaDataSet& operator=(const PndMvaDataSet& other); /** * Class conditional mean for a given class. Stored in class * conditional means container. */ void CompClsCondMean(const std::string& clsName); /** * Computes Variance (unbiased estimator) for each parameter in the * feature list. * @param clsName The name of the class of events for with we want * to compute Var(X). */ void ComputeVariance(); /** * Determines the median for parameters of the loaded DataSet. */ void DetermineMedian(); /** * Determine Min Max difference. */ void MinMaxDiff(); //! Input File name std::string m_input; //! Classes. std::vector m_classes; //! Variables. std::vector m_vars; //! Container to keep the Event data feature vectors std::vector< std::pair*> > m_events; //! Container to keep the Class Conditional means std::map< std::string, std::vector* > m_ClassCondMeans; }; inline const std::vector< std::pair*> >& PndMvaDataSet::GetData() const { return m_events; } inline const std::vector& PndMvaDataSet::GetClasses() const { return m_classes; } inline const std::vector& PndMvaDataSet::GetVars() const { return m_vars; } inline const std::map< std::string, std::vector* >& PndMvaDataSet::GetClassCondMeans() const { return m_ClassCondMeans; } inline const std::string& PndMvaDataSet::GetInFileName() const { return m_input; } #endif