All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Pages
k_means_clustering_engine.hpp
Go to the documentation of this file.
1 
8 #ifndef PAAL_K_MEANS_CLUSTERING_ENGINE_HPP
9 #define PAAL_K_MEANS_CLUSTERING_ENGINE_HPP
10 
12 #include "paal/utils/functors.hpp"
13 #include "paal/utils/irange.hpp"
14 
15 #include <boost/range/combine.hpp>
16 #include <boost/range/adaptor/indexed.hpp>
17 #include <boost/range/algorithm_ext/iota.hpp>
18 #include <boost/range/algorithm/for_each.hpp>
19 
20 #include <vector>
21 #include <random>
22 #include <iostream>
23 
24 namespace paal {
25 
32 template <class RangeLeft, class RangeRight>
33 auto distance_square(RangeLeft && lrange, RangeRight && rrange) {
34  assert(!boost::empty(lrange));
35  assert(boost::distance(lrange) == boost::distance(rrange));
36 
37  //TODO change to sum_functors when generic lambdas appears
38  decltype(*std::begin(lrange) * *std::begin(rrange)) dist{};
39  for (auto point_pair : boost::combine(lrange, rrange)) {
40  auto diff = boost::get<0>(point_pair) - boost::get<1>(point_pair);
41  dist += diff * diff;
42  }
43  return dist;
44 }
45 
52 template <class Point, class Centers>
53 auto closest_to(Point && point, Centers && centers){
54  using coor_t = range_to_elem_t<Point>;
55  auto dist = std::numeric_limits<coor_t>::max();
56  int new_center = 0;
57  for (auto center : centers | boost::adaptors::indexed()){
58  auto new_dist = distance_square(center.value(), point);
59 
60  if (new_dist < dist) {
61  dist = new_dist;
62  new_center = center.index();
63  }
64  }
65  return new_center;
66 }
67 
76  template <class Center, class New_center>
77  void move_center(Center &last_center, New_center &new_center) {};
79  void new_iteration() {};
80 };
81 
99 template <class Points,
100  class Centers,
101  class Centroid,
102  class ClosestTo, class OutputIterator,
103  class CentroidEqual = utils::equal_to,
104  class Visitor=k_means_visitor >
105 auto k_means(Points &&points, Centers & centers,
106  Centroid centroid, ClosestTo closest_to,
107  OutputIterator result,
108  CentroidEqual c_equal = CentroidEqual{},
109  Visitor visitor=Visitor{}) {
110  using point_t = range_to_elem_t<Points>;
111  using points_bag = std::vector<point_t>;
112 
113  std::vector<points_bag> cluster_points;
114  cluster_points.resize(centers.size());
115  bool zm;
116  do {
117  visitor.new_iteration();
118  zm = false;
119  boost::for_each(cluster_points, std::mem_fn(&points_bag::clear));
120 
121  for (auto && point : points) {
122  cluster_points[closest_to(point)].push_back(point);
123  }
124 
125  for (auto point : cluster_points | boost::adaptors::indexed()) {
126  if(point.value().empty()) continue;
127  auto && old_center = centers[point.index()];
128  auto && new_center = centroid(point.value());
129  if (!c_equal(new_center, old_center)) {
130  visitor.move_center(old_center, new_center);
131  old_center = new_center;
132  zm = true;
133  }
134  }
135  } while (zm == true);
136  for (int cur_cluster : irange(cluster_points.size())) {
137  for (auto const & point : cluster_points[cur_cluster]) {
138  *result = std::make_pair(point, cur_cluster);
139  ++result;
140  }
141  }
142  return centers;
143 }
144 
150 template <typename Points, typename OutputIterator, typename RNG = std::default_random_engine>
151 auto get_random_centers(Points &&points, int number_of_centers, OutputIterator out,
152  RNG && rng = std::default_random_engine{}) {
153 
154  std::vector<int> centers(points.size());
155  boost::iota(centers, 0);
156  std::shuffle(centers.begin(),centers.end(), rng);
157  centers.resize(number_of_centers);
158  for (auto && center : centers) {
159  *out=points[center];
160  ++out;
161  }
162 }
163 
169 template <typename Points, typename RNG = std::default_random_engine>
170 auto get_random_clusters(Points &&points, int number_of_clusters,
171  RNG && rng = std::default_random_engine{}) {
172  std::vector<typename std::decay<Points>::type> clusters(number_of_clusters);
173  std::uniform_int_distribution<> dis(0, number_of_clusters - 1);
174 
175  for (auto o : points) {
176  clusters[distribution(rng)].push_back(o);
177  }
178  return clusters;
179 }
180 
181 }
182 
183 #endif /* PAAL_K_MEANS_CLUSTERING_ENGINE_HPP */
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
equal_to functor
Definition: functors.hpp:556
auto get_random_clusters(Points &&points, int number_of_clusters, RNG &&rng=std::default_random_engine{})
void new_iteration()
new iteration
auto closest_to(Point &&point, Centers &&centers)
auto k_means(Points &&points, Centers &&centers, OutputIterator result, Visitor visitor=Visitor{})
this is solve k_means_clustering problem and return vector of cluster example:
auto irange(T begin, T end)
irange
Definition: irange.hpp:22
This file contains set of simple useful functors or functor adapters.
auto get_random_centers(Points &&points, int number_of_centers, OutputIterator out, RNG &&rng=std::default_random_engine{})
auto distance_square(RangeLeft &&lrange, RangeRight &&rrange)
void move_center(Center &last_center, New_center &new_center)