/*************************************** * Class interface of DataSet class. * * Author: M.Babai (M.Babai@rug.nl) * * License: * * Version: * ***************************************/ #ifndef PND_MVA_DATASET_H #define PND_MVA_DATASET_H // C++ includes #include #include #include #include #include #include #include #include #include #include #include // ROOT #include "TFile.h" #include "TTree.h" #include "TRandom3.h" #include "TMVA/PDEFoam.h" // Local includes #include "PndMvaClass.h" #include "PndMvaVariable.h" // ======================================================================== // Normalization schemes typedef enum {NONE = 0, VARX = 1, MINMAX = 2, MEDIAN = 3} NormType; // ======================================================================== class PndMvaDataSet { public: /** * Constructor. *@param inputFilename Input File name. *@param classNames Names of available classes. *@param varNames Available variabl names. */ PndMvaDataSet(const std::string& inputFilename, const std::vector& classNames, const std::vector& varNames); //! Destructor virtual ~PndMvaDataSet(); /** * Normalize event dataset using one of available methods. * @param t Normalization type (VARX, MINMAX, MEDIAN). */ void NormalizeDataSet(const NormType type = NONE); /** * Write the normalized DataSet to the out-put file. * @param outFile File name to write to */ void WriteDataSet(const std::string& outFile); /** * Initialize the class conditional means vectors. */ void InitClsCondMeans(); /** * Creates a data set with equal number of events for each class. */ void Trim(); //! Get available data. const std::vector< std::pair*> >& GetData() const; //! Get the list of available classes. const std::vector& GetClasses() const; //! Get the list of available variables. const std::vector& GetVars() const; //! Get classconditional means for all classes. const std::map< std::string, std::vector* >& GetClassCondMeans() const; //! Get name of input file name (weight/event file). const std::string& GetInFileName() const; protected: /** * Read input event data. */ void ReadInput(); private: PndMvaDataSet(const PndMvaDataSet& other); PndMvaDataSet& operator=(const PndMvaDataSet& other); /** * Class conditional mean for a given class. Stored in class * conditional means container. */ void CompClsCondMean(const std::string& clsName); /** * Computes Variance (unbiased estimator) for each parameter in the * feature list. * @param clsName The name of the class of events for with we want * to compute Var(X). */ void ComputeVariance(); /** * Determines the median for parameters of the loaded DataSet. */ void DetermineMedian(); /** * Determine Min Max difference. */ void MinMaxDiff(); //! Input File name std::string m_input; //! Classes. std::vector m_classes; //! Variables. std::vector m_vars; //! Container to keep the Event data feature vectors std::vector< std::pair*> > m_events; //! Container to keep the Class Conditional means std::map< std::string, std::vector* > m_ClassCondMeans; }; inline const std::vector< std::pair*> >& PndMvaDataSet::GetData() const { return m_events; } inline const std::vector& PndMvaDataSet::GetClasses() const { return m_classes; } inline const std::vector& PndMvaDataSet::GetVars() const { return m_vars; } inline const std::map< std::string, std::vector* >& PndMvaDataSet::GetClassCondMeans() const { return m_ClassCondMeans; } inline const std::string& PndMvaDataSet::GetInFileName() const { return m_input; } #endif