15 #ifndef PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
16 #define PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
25 #include <boost/range/algorithm/transform.hpp>
26 #include <boost/range/combine.hpp>
27 #include <boost/range/empty.hpp>
28 #include <boost/range/size.hpp>
29 #include <boost/unordered_map.hpp>
30 #include <boost/serialization/vector.hpp>
35 #include <type_traits>
47 template <
typename Funs>
52 template <
typename Po
int>
54 Point
const & m_point;
56 apply(Point
const & point) :
59 auto operator()(fun_t
const &fun)
const -> decltype(fun(m_point)) {
66 template<
class Archive>
67 void serialize(Archive & ar,
const unsigned int version) {
76 : m_hash_funs(std::move(funs)) {}
80 return m_hash_funs == other.m_hash_funs;
84 template <
typename Po
int>
87 std::vector<hash_result_single> values;
89 values.reserve(m_hash_funs.size());
90 boost::transform(m_hash_funs, std::back_inserter(values),
98 template <
typename Po
int>
100 return m_hash_funs | boost::adaptors::transformed(apply<Point>{point});
109 template <
typename FunctionGenerator = default_hash_function_generator>
112 using funs_t = std::vector<fun_t>;
113 FunctionGenerator m_function_generator;
114 unsigned m_hash_functions_per_point;
123 unsigned hash_functions_per_point) :
124 m_function_generator(std::forward<FunctionGenerator>(function_generator)),
125 m_hash_functions_per_point(hash_functions_per_point) {
137 hash_funs.reserve(m_hash_functions_per_point);
138 std::generate_n(std::back_inserter(hash_funs),
139 m_hash_functions_per_point,
140 std::ref(m_function_generator));
155 template <
typename FunctionGenerator>
157 unsigned hash_functions_per_point) {
159 std::forward<FunctionGenerator>(function_generator),
160 hash_functions_per_point);
165 template <
typename Fun,
typename Po
int>
166 auto call(Fun
const & f, Point &&p, detail::lightweight_tag) {
167 return f(std::forward<Point>(p));
170 template <
typename Function,
typename Po
int>
171 auto call(hash_function_tuple<Function>
const & f,
172 Point &&p, detail::lightweight_tag tag) {
173 return f(std::forward<Point>(p), tag);
190 template <
typename HashValue,
193 typename HashForHashValue = range_hash>
198 using map_t = boost::unordered_map<HashValue, res_accu_t, HashForHashValue>;
201 std::vector<map_t> m_hash_maps;
203 std::vector<LshFun> m_hashes;
211 template<
class Archive>
212 void serialize(Archive & ar,
const unsigned int version){
232 template <
typename TrainingPo
ints,
typename TrainingResults,
typename LshFunctionGenerator>
234 TrainingPoints &&training_points, TrainingResults &&training_results,
236 LshFunctionGenerator &&lsh_function_generator,
237 unsigned threads_count = std::thread::hardware_concurrency()) :
238 m_hash_maps(passes) {
240 m_hashes.reserve(passes);
241 std::generate_n(std::back_inserter(m_hashes), passes,
242 std::ref(lsh_function_generator));
244 update(std::forward<TrainingPoints>(training_points),
245 std::forward<TrainingResults>(training_results),
251 return m_avg == other.m_avg &&
252 m_hashes == other.m_hashes &&
253 m_hash_maps == other.m_hash_maps;
266 template <
typename TrainingPo
ints,
typename TrainingResults>
267 void update(TrainingPoints &&training_points, TrainingResults &&training_results,
268 unsigned threads_count = std::thread::hardware_concurrency()) {
272 threads.
post([&](){ compute_avg(training_results);});
274 for (
auto &&map_and_fun : boost::combine(m_hash_maps, m_hashes)) {
275 auto &map = boost::get<0>(map_and_fun);
277 threads.
post([&, fun = boost::get<1>(map_and_fun)]() {add_values(fun, map, training_points, training_results);});
291 template <
typename TestPo
ints,
typename OutputIterator>
292 void test(TestPoints &&test_points, OutputIterator result)
const {
293 assert(!m_avg.
empty());
295 for (
auto &&test_point : test_points) {
297 for(
auto && map_and_fun : boost::combine(m_hash_maps, m_hashes)) {
298 auto const &map = boost::get<0>(map_and_fun);
299 auto const &fun = boost::get<1>(map_and_fun);
302 if (got != map.end()) {
303 avg.
add_value(got->second.get_average_unsafe());
314 template <
typename Po
ints,
typename Results>
315 void add_values(LshFun fun, map_t & map, Points && training_points, Results && training_results) {
316 for (
auto &&training_point_result : boost::combine(training_points, training_results)) {
317 auto && point = boost::get<0>(training_point_result);
318 auto && res = boost::get<1>(training_point_result);
323 if (got != map.end()) {
324 got->second.add_value(res);
326 map[fun(point)].add_value(res);
333 template <
typename Results>
334 void compute_avg(Results
const & training_results) {
335 for (
auto && res :training_results) {
357 template <
typename TrainingPoints,
typename TrainingResults,
358 typename LshFunctionGenerator>
360 TrainingPoints &&training_points, TrainingResults &&training_results,
362 LshFunctionGenerator &&lsh_function_generator,
363 unsigned threads_count = std::thread::hardware_concurrency()) {
365 using hash_result =
typename std::remove_reference<
366 typename std::result_of<lsh_fun(
372 std::forward<TrainingPoints>(training_points),
373 std::forward<TrainingResults>(training_results),
375 std::forward<LshFunctionGenerator>(lsh_function_generator),
397 template <
typename TrainingPoints,
typename TrainingResults,
398 typename FunctionGenerator>
400 TrainingPoints &&training_points, TrainingResults &&training_results,
402 FunctionGenerator &&function_generator,
403 unsigned hash_functions_per_point,
404 unsigned threads_count = std::thread::hardware_concurrency()) {
407 std::forward<FunctionGenerator>(function_generator),
408 hash_functions_per_point);
410 std::forward<TrainingPoints>(training_points),
411 std::forward<TrainingResults>(training_results),
413 std::move(tuple_lsh),
419 #endif // PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
void post(Functor f)
post new task
Factory class for projection_hash_function.
void update(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned threads_count=std::thread::hardware_concurrency())
trainings model
lsh_nearest_neighbors_regression()=default
default constructor, only for serialization purpose
hash_function_tuple(Funs funs)
constructor
auto operator()(Point &&point, detail::lightweight_tag) const
from https://code.google.com/p/ntest/source/browse/unordered_map_serialization.h
ReturnType get_average(ReturnType default_value=ReturnType{}) const
auto make_lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
this is the most general version of the make_lsh_nearest_neighbors_regression, It takes any hash func...
auto make_hash_function_tuple_generator(FunctionGenerator &&function_generator, unsigned hash_functions_per_point)
lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
initializes model and trainings model using training points and results
auto operator()(Point &&point) const
operator()(), returns vector of hash values
auto make_lsh_nearest_neighbors_regression_tuple_hash(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, FunctionGenerator &&function_generator, unsigned hash_functions_per_point, unsigned threads_count=std::thread::hardware_concurrency())
This is the special version of make_lsh_nearest_neighbors_regression. This version assumes that hash ...
void add_value(ValueType value, CounterType cnt=1)
void serialize(Archive &ar, const unsigned int version)
serialization
simple threadpool, class uses also current thread!
void run()
run all posted tasks (blocking)
void serialize(Archive &ar, const unsigned int version)
serialize
hash_function_tuple< funs_t > operator()() const
hash_function_tuple_generator(FunctionGenerator function_generator, unsigned hash_functions_per_point)
functor representing tuple of hash functions
bool operator==(lsh_nearest_neighbors_regression const &other) const
operator==
TODO equivalent to c++14 equal_to<>, remove when appears.
void test(TestPoints &&test_points, OutputIterator result) const
queries model, does not heave threads_count parameter, because this is much more natural to do from o...
typename std::decay< typename std::result_of< F >::type >::type pure_result_of_t
return pure type of function (decays const and reference)
helper class facilitating counting average
typename boost::range_reference< Range >::type range_to_ref_t
for given range returns type of its reference
bool operator==(hash_function_tuple const &other) const
operator==