/* *************************************** * Clustering algorithms * * Author: M.Babai@rug.nl * * Version: * * LICENSE: * * *************************************** */ #pragma once #ifndef PND_MVA_CLUSTER_H #define PND_MVA_CLUSTER_H #include #include #include #include #include // Local includes #include "PndMvaUtil.h" #define PNDMVA_CLUSTER_DEBUG 0 //! Data structure of the space points and the cluster centers typedef std::vector< std::pair*> > DataPoints; //! Clustering types typedef enum ClusteringType{ KMEANS_HARD = 0, KMEANS_SOFT = 1// Not Implemented yet. } ClusteringType; //---------------- Class definition -------------- class PndMvaCluster { //-------------------------------------------- // -------------- public members ------------- public: /** * Constructor. *@param InputData Input Data points. *@param nCluster Number of clusters to be created. */ explicit PndMvaCluster( DataPoints const& InputData, size_t nCluster); /** * Destructor. */ virtual ~PndMvaCluster(); /** * Compute Cluster centers for the current input data. *@param ClType Clustering algorithm. *@return Vector containing the cluster centroids. */ virtual DataPoints* Cluster( ClusteringType const ClType = KMEANS_HARD); /** * Compute Cluster centers for the current input data. The label of * the majority of their members determines the label of the center. *@param ClType Clustering algorithm (default is KMEANS_HARD). *@return Vector containing the cluster centroids. */ virtual DataPoints* ClusterAndLabel( ClusteringType const ClType, std::vector const& labels); //------- Getters /** * Get the number of cluster centeroids. *@return Number of cluster centeroids. */ inline size_t GetNumberOfClusters() const; /** * Get the dimension of the input data points. *@return Dimension of the data points. */ inline size_t GetClusterDimension() const; //------- Setters /** * Set the number of centroids. *@param val Number of centeroids. */ inline void SetNumberOfClusters(size_t val); //__________________ DEBUG FUNCTIONS ______________ #if (PNDMVA_CLUSTER_DEBUG > 0) /** * Print the vectors and centroids and their relation. */ void printStructs(); #endif //-------------------------------------------- //protected: // -------------- private members ------------ private: //! To avoid mistakes. PndMvaCluster(const PndMvaCluster& other); PndMvaCluster& operator=(const PndMvaCluster& other); // Functions & Procedures /// Performs the actual hard K-Means clustering. DataPoints* K_Means(); /// Initialize the centroids before clustering. void InitCentroids(); /// Partitions the data points among the current cluster centroids. void InitialPartition(); /// Compute (modify) the coordinates of centroids. void ComputeCentroids(); /// Clear the currently used data structures. void ClearStructures(); /// Init empty Centroid to the furthest point. void ReInitEmptyCenter(size_t centerIdx); // Variables size_t m_num_Cluster;/// number of cluster centers. DataPoints m_PointSet;/// input data points. size_t m_dimension;/// Data points dimension. /// Container to hold the centroid. DataPoints m_Centroids; ///Connection of each point to a centroid. std::vector m_PointsToClusters; /// Responsibility list of each centroid. std::vector< std::set* > m_ClustersToPoints; }; //__________________ Inlines ____________ inline size_t PndMvaCluster::GetNumberOfClusters() const { return m_num_Cluster; }; inline size_t PndMvaCluster::GetClusterDimension() const { return m_dimension; }; inline void PndMvaCluster::SetNumberOfClusters(size_t val) { m_num_Cluster = val; }; #endif// End interface