docs/lsh__nearest__neighbors__regression_8hpp_source.html

 //=======================================================================

 // Copyright (c) 2014 Andrzej Pacuk, Piotr Wygocki

 //

 // Distributed under the Boost Software License, Version 1.0. (See

 // accompanying file LICENSE_1_0.txt or copy at

 // http://www.boost.org/LICENSE_1_0.txt)

 //=======================================================================

 #ifndef PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP

 #define PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP


 #include "paal/data_structures/thread_pool.hpp"

 #include "paal/regression/lsh_functions.hpp"

 #include "paal/utils/accumulate_functors.hpp"

 #include "paal/utils/hash.hpp"

 #include "paal/utils/type_functions.hpp"

 #include "paal/utils/unordered_map_serialization.hpp"


 #include <boost/range/algorithm/transform.hpp>

 #include <boost/range/combine.hpp>

 #include <boost/range/empty.hpp>

 #include <boost/range/size.hpp>

 #include <boost/unordered_map.hpp>

 #include <boost/serialization/vector.hpp>


 #include <functional>

 #include <iterator>

 #include <thread>

 #include <type_traits>

 #include <vector>


 namespace paal {


 namespace detail {struct lightweight_tag{};}


 using default_hash_function_generator = lsh::hamming_hash_function_generator;


 template <typename Funs>

 class hash_function_tuple {

     Funs m_hash_funs;

     using fun_t = range_to_elem_t<Funs>;


     template <typename Point>

         class apply {

             Point const & m_point;

         public:

             apply(Point const & point) :

                 m_point(point) {}


             auto operator()(fun_t const &fun) const -> decltype(fun(m_point))  {

                 return fun(m_point);

             }

         };


 public:

     template<class Archive>

     void serialize(Archive & ar, const unsigned int version) {

         ar & m_hash_funs;

     }


     //default constructor, only for serialization purpose

     hash_function_tuple() = default;


     hash_function_tuple(Funs funs)

         : m_hash_funs(std::move(funs)) {}


     bool operator==(hash_function_tuple const & other) const {

         return m_hash_funs == other.m_hash_funs;

     }


     template <typename Point>

     auto operator()(Point && point) const {

         using hash_result_single = pure_result_of_t<fun_t(Point)>;

         std::vector<hash_result_single> values;


         values.reserve(m_hash_funs.size());

         boost::transform(m_hash_funs, std::back_inserter(values),

                 apply<Point>{point});


         return values;

     }


     template <typename Point>

     auto operator()(Point && point, detail::lightweight_tag) const {

         return m_hash_funs | boost::adaptors::transformed(apply<Point>{point});

     }

 };


 template <typename FunctionGenerator = default_hash_function_generator>

 class hash_function_tuple_generator {

     using fun_t = pure_result_of_t<FunctionGenerator()>;

     using funs_t = std::vector<fun_t>;

     FunctionGenerator m_function_generator;

     unsigned m_hash_functions_per_point;

 public:

     hash_function_tuple_generator(FunctionGenerator function_generator,

                                   unsigned hash_functions_per_point) :

         m_function_generator(std::forward<FunctionGenerator>(function_generator)),

         m_hash_functions_per_point(hash_functions_per_point) {

     }


     //TODO change to auto, when it starts working

     hash_function_tuple<funs_t> operator()() const {

         funs_t hash_funs;

         hash_funs.reserve(m_hash_functions_per_point);

         std::generate_n(std::back_inserter(hash_funs),

                         m_hash_functions_per_point,

                         std::ref(m_function_generator));


         return hash_function_tuple<funs_t>(std::move(hash_funs));

     }

 };


 template <typename FunctionGenerator>

 auto make_hash_function_tuple_generator(FunctionGenerator &&function_generator,

                                         unsigned hash_functions_per_point) {

     return hash_function_tuple_generator<FunctionGenerator>(

                 std::forward<FunctionGenerator>(function_generator),

                 hash_functions_per_point);

 }


 namespace detail {


     template <typename Fun, typename Point>

     auto call(Fun const & f, Point &&p, detail::lightweight_tag) {

         return f(std::forward<Point>(p));

     }


     template <typename Function, typename Point>

     auto call(hash_function_tuple<Function> const & f,

               Point &&p, detail::lightweight_tag tag) {

         return f(std::forward<Point>(p), tag);

     }

 }


 template <typename HashValue,

           typename LshFun,

           //TODO default value here supposed to be std::hash

           typename HashForHashValue = range_hash>

 class lsh_nearest_neighbors_regression {


     //TODO template param TestResultType

     using res_accu_t = average_accumulator<>;

     using map_t = boost::unordered_map<HashValue, res_accu_t, HashForHashValue>;


     std::vector<map_t> m_hash_maps;

     std::vector<LshFun> m_hashes;


     average_accumulator<> m_avg;


 public:


     template<class Archive>

     void serialize(Archive & ar, const unsigned int version){

         ar & m_hash_maps;

         ar & m_hashes;

         ar & m_avg;

     }


     lsh_nearest_neighbors_regression() = default;


     template <typename TrainingPoints, typename TrainingResults, typename LshFunctionGenerator>

     lsh_nearest_neighbors_regression(

             TrainingPoints &&training_points, TrainingResults &&training_results,

             unsigned passes,

             LshFunctionGenerator &&lsh_function_generator,

             unsigned threads_count = std::thread::hardware_concurrency()) :

         m_hash_maps(passes) {


         m_hashes.reserve(passes);

         std::generate_n(std::back_inserter(m_hashes), passes,

                     std::ref(lsh_function_generator));


         update(std::forward<TrainingPoints>(training_points),

                std::forward<TrainingResults>(training_results),

                threads_count);

     }


     bool operator==(lsh_nearest_neighbors_regression const & other) const {

         return m_avg == other.m_avg &&

                m_hashes == other.m_hashes &&

                m_hash_maps == other.m_hash_maps;

     }


     template <typename TrainingPoints, typename TrainingResults>

     void update(TrainingPoints &&training_points, TrainingResults &&training_results,

             unsigned threads_count = std::thread::hardware_concurrency()) {


         thread_pool threads(threads_count);


         threads.post([&](){ compute_avg(training_results);});


         for (auto &&map_and_fun : boost::combine(m_hash_maps, m_hashes)) {

             auto &map = boost::get<0>(map_and_fun);

             //fun is passed by value because of efficiency reasons

             threads.post([&, fun = boost::get<1>(map_and_fun)]() {add_values(fun, map, training_points, training_results);});

         }

         threads.run();

     }


     template <typename TestPoints, typename OutputIterator>

     void test(TestPoints &&test_points, OutputIterator result) const {

         assert(!m_avg.empty());


         for (auto &&test_point : test_points) {

             average_accumulator<> avg;

             for(auto && map_and_fun : boost::combine(m_hash_maps, m_hashes)) {

                 auto const &map = boost::get<0>(map_and_fun);

                 auto const &fun = boost::get<1>(map_and_fun);

                 auto got = map.find(detail::call(fun, test_point, detail::lightweight_tag{}),

                                     HashForHashValue{}, utils::equal_to_unspecified{});

                 if (got != map.end()) {

                     avg.add_value(got->second.get_average_unsafe());

                 }

             }

             *result = avg.get_average(m_avg.get_average());

             ++result;

         }

     }


 private:


     template <typename Points, typename Results>

     void add_values(LshFun fun, map_t & map, Points && training_points, Results && training_results) {

         for (auto &&training_point_result : boost::combine(training_points, training_results)) {

             auto && point = boost::get<0>(training_point_result);

             auto && res = boost::get<1>(training_point_result);


             //the return value of this call might be impossible to store in the map

             auto got = map.find(call(fun, point, detail::lightweight_tag{}),

                                  HashForHashValue{}, utils::equal_to_unspecified{});

             if (got != map.end()) {

                 got->second.add_value(res);

             } else {

                 map[fun(point)].add_value(res);

             }


         }

     }


     template <typename Results>

     void compute_avg(Results const & training_results) {

         for (auto && res :training_results) {

             m_avg.add_value(res);

         }

     }

 };


 template <typename TrainingPoints, typename TrainingResults,

           typename LshFunctionGenerator>

 auto make_lsh_nearest_neighbors_regression(

              TrainingPoints &&training_points, TrainingResults &&training_results,

              unsigned passes,

              LshFunctionGenerator &&lsh_function_generator,

              unsigned threads_count = std::thread::hardware_concurrency()) {

     using lsh_fun = pure_result_of_t<LshFunctionGenerator()>;

     using hash_result = typename std::remove_reference<

         typename std::result_of<lsh_fun(

                 range_to_ref_t<TrainingPoints>

                 )>::type

         >::type;


     return lsh_nearest_neighbors_regression<hash_result, lsh_fun>(

             std::forward<TrainingPoints>(training_points),

             std::forward<TrainingResults>(training_results),

             passes,

             std::forward<LshFunctionGenerator>(lsh_function_generator),

             threads_count);

 }


 template <typename TrainingPoints, typename TrainingResults,

           typename FunctionGenerator>

 auto make_lsh_nearest_neighbors_regression_tuple_hash(

              TrainingPoints &&training_points, TrainingResults &&training_results,

              unsigned passes,

              FunctionGenerator &&function_generator,

              unsigned hash_functions_per_point,

              unsigned threads_count = std::thread::hardware_concurrency()) {


     auto tuple_lsh = paal::make_hash_function_tuple_generator(

                     std::forward<FunctionGenerator>(function_generator),

                     hash_functions_per_point);

     return make_lsh_nearest_neighbors_regression(

             std::forward<TrainingPoints>(training_points),

             std::forward<TrainingResults>(training_results),

             passes,

             std::move(tuple_lsh),

             threads_count);

 }


 }


 #endif // PAAL_LSH_NEAREST_NEIGHBOURS_REGRESSION_HPP

paal::range_to_elem_t
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
Definition: type_functions.hpp:40

paal::thread_pool::post
void post(Functor f)
post new task
Definition: thread_pool.hpp:39

paal::lsh::random_projection_hash_function_generator
Factory class for projection_hash_function.
Definition: lsh_functions.hpp:102

paal::lsh_nearest_neighbors_regression::update
void update(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned threads_count=std::thread::hardware_concurrency())
trainings model
Definition: lsh_nearest_neighbors_regression.hpp:267

paal::lsh_nearest_neighbors_regression::lsh_nearest_neighbors_regression
lsh_nearest_neighbors_regression()=default
default constructor, only for serialization purpose

type_functions.hpp

paal::hash_function_tuple::hash_function_tuple
hash_function_tuple(Funs funs)
constructor
Definition: lsh_nearest_neighbors_regression.hpp:75

paal::hash_function_tuple::operator()
auto operator()(Point &&point, detail::lightweight_tag) const
Definition: lsh_nearest_neighbors_regression.hpp:99

unordered_map_serialization.hpp
from https://code.google.com/p/ntest/source/browse/unordered_map_serialization.h

lsh_functions.hpp

paal::average_accumulator::empty
bool empty() const
Definition: accumulate_functors.hpp:145

paal::average_accumulator::get_average
ReturnType get_average(ReturnType default_value=ReturnType{}) const
Definition: accumulate_functors.hpp:167

paal::make_lsh_nearest_neighbors_regression
auto make_lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
this is the most general version of the make_lsh_nearest_neighbors_regression, It takes any hash func...
Definition: lsh_nearest_neighbors_regression.hpp:359

paal::make_hash_function_tuple_generator
auto make_hash_function_tuple_generator(FunctionGenerator &&function_generator, unsigned hash_functions_per_point)
Definition: lsh_nearest_neighbors_regression.hpp:156

paal::lsh_nearest_neighbors_regression::lsh_nearest_neighbors_regression
lsh_nearest_neighbors_regression(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, LshFunctionGenerator &&lsh_function_generator, unsigned threads_count=std::thread::hardware_concurrency())
initializes model and trainings model using training points and results
Definition: lsh_nearest_neighbors_regression.hpp:233

paal::hash_function_tuple::operator()
auto operator()(Point &&point) const
operator()(), returns vector of hash values
Definition: lsh_nearest_neighbors_regression.hpp:85

paal::make_lsh_nearest_neighbors_regression_tuple_hash
auto make_lsh_nearest_neighbors_regression_tuple_hash(TrainingPoints &&training_points, TrainingResults &&training_results, unsigned passes, FunctionGenerator &&function_generator, unsigned hash_functions_per_point, unsigned threads_count=std::thread::hardware_concurrency())
This is the special version of make_lsh_nearest_neighbors_regression. This version assumes that hash ...
Definition: lsh_nearest_neighbors_regression.hpp:399

thread_pool.hpp

paal::average_accumulator::add_value
void add_value(ValueType value, CounterType cnt=1)
Definition: accumulate_functors.hpp:107

paal::lsh_nearest_neighbors_regression::serialize
void serialize(Archive &ar, const unsigned int version)
serialization
Definition: lsh_nearest_neighbors_regression.hpp:212

paal::thread_pool
simple threadpool, class uses also current thread!
Definition: thread_pool.hpp:25

paal::thread_pool::run
void run()
run all posted tasks (blocking)
Definition: thread_pool.hpp:45

paal::hash_function_tuple::serialize
void serialize(Archive &ar, const unsigned int version)
serialize
Definition: lsh_nearest_neighbors_regression.hpp:67

paal::hash_function_tuple_generator::operator()
hash_function_tuple< funs_t > operator()() const
Definition: lsh_nearest_neighbors_regression.hpp:135

paal::hash_function_tuple_generator::hash_function_tuple_generator
hash_function_tuple_generator(FunctionGenerator function_generator, unsigned hash_functions_per_point)
Definition: lsh_nearest_neighbors_regression.hpp:122

paal::hash_function_tuple
functor representing tuple of hash functions
Definition: lsh_nearest_neighbors_regression.hpp:48

paal::lsh_nearest_neighbors_regression::operator==
bool operator==(lsh_nearest_neighbors_regression const &other) const
operator==
Definition: lsh_nearest_neighbors_regression.hpp:250

paal::utils::equal_to_unspecified
TODO equivalent to c++14 equal_to&lt;&gt;, remove when appears.
Definition: functors.hpp:573

paal::lsh_nearest_neighbors_regression::test
void test(TestPoints &&test_points, OutputIterator result) const
queries model, does not heave threads_count parameter, because this is much more natural to do from o...
Definition: lsh_nearest_neighbors_regression.hpp:292

paal::lsh_nearest_neighbors_regression
detail
Definition: lsh_nearest_neighbors_regression.hpp:194

hash.hpp

paal::pure_result_of_t
typename std::decay< typename std::result_of< F >::type >::type pure_result_of_t
return pure type of function (decays const and reference)
Definition: type_functions.hpp:65

accumulate_functors.hpp

paal::hash_function_tuple_generator
Definition: lsh_nearest_neighbors_regression.hpp:110

paal::average_accumulator
helper class facilitating counting average
Definition: accumulate_functors.hpp:72

paal::range_to_ref_t
typename boost::range_reference< Range >::type range_to_ref_t
for given range returns type of its reference
Definition: type_functions.hpp:36

paal::detail::lightweight_tag
Definition: lsh_nearest_neighbors_regression.hpp:40

paal::hash_function_tuple::operator==
bool operator==(hash_function_tuple const &other) const
operator==
Definition: lsh_nearest_neighbors_regression.hpp:79