All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Pages
lsh_nearest_neighbors_regression.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014 Andrzej Pacuk, Piotr Wygocki
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //=======================================================================
15 #ifndef PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
16 #define PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
17 
21 #include "paal/utils/hash.hpp"
24 
25 #include <boost/range/algorithm/transform.hpp>
26 #include <boost/range/combine.hpp>
27 #include <boost/range/empty.hpp>
28 #include <boost/range/size.hpp>
29 #include <boost/unordered_map.hpp>
30 #include <boost/serialization/vector.hpp>
31 
32 #include <functional>
33 #include <iterator>
34 #include <thread>
35 #include <type_traits>
36 #include <vector>
37 
38 namespace paal {
39 
40 namespace detail {struct lightweight_tag{};}
41 
43 
47 template <typename Funs>
49  Funs m_hash_funs;
50  using fun_t = range_to_elem_t<Funs>;
51 
52  template <typename Point>
53  class apply {
54  Point const & m_point;
55  public:
56  apply(Point const & point) :
57  m_point(point) {}
58 
59  auto operator()(fun_t const &fun) const -> decltype(fun(m_point)) {
60  return fun(m_point);
61  }
62  };
63 
64 public:
66  template<class Archive>
67  void serialize(Archive & ar, const unsigned int version) {
68  ar & m_hash_funs;
69  }
70 
71  //default constructor, only for serialization purpose
72  hash_function_tuple() = default;
73 
76  : m_hash_funs(std::move(funs)) {}
77 
79  bool operator==(hash_function_tuple const & other) const {
80  return m_hash_funs == other.m_hash_funs;
81  }
82 
84  template <typename Point>
85  auto operator()(Point && point) const {
86  using hash_result_single = pure_result_of_t<fun_t(Point)>;
87  std::vector<hash_result_single> values;
88 
89  values.reserve(m_hash_funs.size());
90  boost::transform(m_hash_funs, std::back_inserter(values),
91  apply<Point>{point});
92 
93  return values;
94  }
95 
98  template <typename Point>
99  auto operator()(Point && point, detail::lightweight_tag) const {
100  return m_hash_funs | boost::adaptors::transformed(apply<Point>{point});
101  }
102 };
103 
109 template <typename FunctionGenerator = default_hash_function_generator>
112  using funs_t = std::vector<fun_t>;
113  FunctionGenerator m_function_generator;
114  unsigned m_hash_functions_per_point;
115 public:
122  hash_function_tuple_generator(FunctionGenerator function_generator,
123  unsigned hash_functions_per_point) :
124  m_function_generator(std::forward<FunctionGenerator>(function_generator)),
125  m_hash_functions_per_point(hash_functions_per_point) {
126  }
127 
128 
134  //TODO change to auto, when it starts working
136  funs_t hash_funs;
137  hash_funs.reserve(m_hash_functions_per_point);
138  std::generate_n(std::back_inserter(hash_funs),
139  m_hash_functions_per_point,
140  std::ref(m_function_generator));
141 
142  return hash_function_tuple<funs_t>(std::move(hash_funs));
143  }
144 };
145 
155 template <typename FunctionGenerator>
156 auto make_hash_function_tuple_generator(FunctionGenerator &&function_generator,
157  unsigned hash_functions_per_point) {
159  std::forward<FunctionGenerator>(function_generator),
160  hash_functions_per_point);
161 }
162 
163 namespace detail {
164 
165  template <typename Fun, typename Point>
166  auto call(Fun const & f, Point &&p, detail::lightweight_tag) {
167  return f(std::forward<Point>(p));
168  }
169 
170  template <typename Function, typename Point>
171  auto call(hash_function_tuple<Function> const & f,
172  Point &&p, detail::lightweight_tag tag) {
173  return f(std::forward<Point>(p), tag);
174  }
175 }
176 
190 template <typename HashValue,
191  typename LshFun,
192  //TODO default value here supposed to be std::hash
193  typename HashForHashValue = range_hash>
195 
196  //TODO template param TestResultType
198  using map_t = boost::unordered_map<HashValue, res_accu_t, HashForHashValue>;
199 
201  std::vector<map_t> m_hash_maps;
203  std::vector<LshFun> m_hashes;
204 
206  average_accumulator<> m_avg;
207 
208 public:
209 
211  template<class Archive>
212  void serialize(Archive & ar, const unsigned int version){
213  ar & m_hash_maps;
214  ar & m_hashes;
215  ar & m_avg;
216  }
217 
220 
232  template <typename TrainingPoints, typename TrainingResults, typename LshFunctionGenerator>
234  TrainingPoints &&training_points, TrainingResults &&training_results,
235  unsigned passes,
236  LshFunctionGenerator &&lsh_function_generator,
237  unsigned threads_count = std::thread::hardware_concurrency()) :
238  m_hash_maps(passes) {
239 
240  m_hashes.reserve(passes);
241  std::generate_n(std::back_inserter(m_hashes), passes,
242  std::ref(lsh_function_generator));
243 
244  update(std::forward<TrainingPoints>(training_points),
245  std::forward<TrainingResults>(training_results),
246  threads_count);
247  }
248 
250  bool operator==(lsh_nearest_neighbors_regression const & other) const {
251  return m_avg == other.m_avg &&
252  m_hashes == other.m_hashes &&
253  m_hash_maps == other.m_hash_maps;
254  }
255 
256 
266  template <typename TrainingPoints, typename TrainingResults>
267  void update(TrainingPoints &&training_points, TrainingResults &&training_results,
268  unsigned threads_count = std::thread::hardware_concurrency()) {
269 
270  thread_pool threads(threads_count);
271 
272  threads.post([&](){ compute_avg(training_results);});
273 
274  for (auto &&map_and_fun : boost::combine(m_hash_maps, m_hashes)) {
275  auto &map = boost::get<0>(map_and_fun);
276  //fun is passed by value because of efficiency reasons
277  threads.post([&, fun = boost::get<1>(map_and_fun)]() {add_values(fun, map, training_points, training_results);});
278  }
279  threads.run();
280  }
281 
291  template <typename TestPoints, typename OutputIterator>
292  void test(TestPoints &&test_points, OutputIterator result) const {
293  assert(!m_avg.empty());
294 
295  for (auto &&test_point : test_points) {
297  for(auto && map_and_fun : boost::combine(m_hash_maps, m_hashes)) {
298  auto const &map = boost::get<0>(map_and_fun);
299  auto const &fun = boost::get<1>(map_and_fun);
300  auto got = map.find(detail::call(fun, test_point, detail::lightweight_tag{}),
301  HashForHashValue{}, utils::equal_to_unspecified{});
302  if (got != map.end()) {
303  avg.add_value(got->second.get_average_unsafe());
304  }
305  }
306  *result = avg.get_average(m_avg.get_average());
307  ++result;
308  }
309  }
310 
311 private:
312 
314  template <typename Points, typename Results>
315  void add_values(LshFun fun, map_t & map, Points && training_points, Results && training_results) {
316  for (auto &&training_point_result : boost::combine(training_points, training_results)) {
317  auto && point = boost::get<0>(training_point_result);
318  auto && res = boost::get<1>(training_point_result);
319 
320  //the return value of this call might be impossible to store in the map
321  auto got = map.find(call(fun, point, detail::lightweight_tag{}),
322  HashForHashValue{}, utils::equal_to_unspecified{});
323  if (got != map.end()) {
324  got->second.add_value(res);
325  } else {
326  map[fun(point)].add_value(res);
327  }
328 
329  }
330  }
331 
333  template <typename Results>
334  void compute_avg(Results const & training_results) {
335  for (auto && res :training_results) {
336  m_avg.add_value(res);
337  }
338  }
339 };
340 
357 template <typename TrainingPoints, typename TrainingResults,
358  typename LshFunctionGenerator>
360  TrainingPoints &&training_points, TrainingResults &&training_results,
361  unsigned passes,
362  LshFunctionGenerator &&lsh_function_generator,
363  unsigned threads_count = std::thread::hardware_concurrency()) {
365  using hash_result = typename std::remove_reference<
366  typename std::result_of<lsh_fun(
368  )>::type
369  >::type;
370 
372  std::forward<TrainingPoints>(training_points),
373  std::forward<TrainingResults>(training_results),
374  passes,
375  std::forward<LshFunctionGenerator>(lsh_function_generator),
376  threads_count);
377 }
378 
379 
397 template <typename TrainingPoints, typename TrainingResults,
398  typename FunctionGenerator>
400  TrainingPoints &&training_points, TrainingResults &&training_results,
401  unsigned passes,
402  FunctionGenerator &&function_generator,
403  unsigned hash_functions_per_point,
404  unsigned threads_count = std::thread::hardware_concurrency()) {
405 
407  std::forward<FunctionGenerator>(function_generator),
408  hash_functions_per_point);
410  std::forward<TrainingPoints>(training_points),
411  std::forward<TrainingResults>(training_results),
412  passes,
413  std::move(tuple_lsh),
414  threads_count);
415 }
416 
417 }
418 
419 #endif // PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
void post(Functor f)
post new task
Definition: thread_pool.hpp:39
Factory class for projection_hash_function.
void update(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned threads_count=std::thread::hardware_concurrency())
trainings model
lsh_nearest_neighbors_regression()=default
default constructor, only for serialization purpose
auto operator()(Point &&point, detail::lightweight_tag) const
from https://code.google.com/p/ntest/source/browse/unordered_map_serialization.h
ReturnType get_average(ReturnType default_value=ReturnType{}) const
auto make_lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
this is the most general version of the make_lsh_nearest_neighbors_regression, It takes any hash func...
auto make_hash_function_tuple_generator(FunctionGenerator &&function_generator, unsigned hash_functions_per_point)
lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
initializes model and trainings model using training points and results
auto operator()(Point &&point) const
operator()(), returns vector of hash values
auto make_lsh_nearest_neighbors_regression_tuple_hash(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, FunctionGenerator &&function_generator, unsigned hash_functions_per_point, unsigned threads_count=std::thread::hardware_concurrency())
This is the special version of make_lsh_nearest_neighbors_regression. This version assumes that hash ...
void add_value(ValueType value, CounterType cnt=1)
void serialize(Archive &ar, const unsigned int version)
serialization
simple threadpool, class uses also current thread!
Definition: thread_pool.hpp:25
void run()
run all posted tasks (blocking)
Definition: thread_pool.hpp:45
void serialize(Archive &ar, const unsigned int version)
serialize
hash_function_tuple< funs_t > operator()() const
hash_function_tuple_generator(FunctionGenerator function_generator, unsigned hash_functions_per_point)
functor representing tuple of hash functions
bool operator==(lsh_nearest_neighbors_regression const &other) const
operator==
TODO equivalent to c++14 equal_to&lt;&gt;, remove when appears.
Definition: functors.hpp:573
void test(TestPoints &&test_points, OutputIterator result) const
queries model, does not heave threads_count parameter, because this is much more natural to do from o...
typename std::decay< typename std::result_of< F >::type >::type pure_result_of_t
return pure type of function (decays const and reference)
helper class facilitating counting average
typename boost::range_reference< Range >::type range_to_ref_t
for given range returns type of its reference
bool operator==(hash_function_tuple const &other) const
operator==