15 #ifndef PAAL_LSH_FUNCTIONS_HPP
16 #define PAAL_LSH_FUNCTIONS_HPP
22 #include <boost/range/algorithm/equal.hpp>
23 #include <boost/range/algorithm/generate.hpp>
24 #include <boost/range/algorithm/min_element.hpp>
25 #include <boost/range/algorithm_ext/iota.hpp>
26 #include <boost/range/counting_range.hpp>
27 #include <boost/range/empty.hpp>
28 #include <boost/range/iterator.hpp>
29 #include <boost/range/numeric.hpp>
30 #include <boost/range/size.hpp>
31 #include <boost/range/adaptor/transformed.hpp>
32 #include <boost/numeric/ublas/vector.hpp>
33 #include <boost/numeric/ublas/vector_sparse.hpp>
34 #include <boost/numeric/ublas/vector_expression.hpp>
40 #include <type_traits>
50 template <
typename IntType = std::
size_t>
52 IntType m_chosen_position;
56 template<
class Archive>
57 void serialize(Archive & ar,
const unsigned int version) {
58 ar & m_chosen_position;
71 m_chosen_position(chosen_position) {
76 return m_chosen_position == other.m_chosen_position;
87 template <
typename Range>
93 return range[m_chosen_position];
100 template <
typename RandomEngine = std::default_random_engine,
101 typename IntType = std::size_t>
103 mutable RandomEngine m_generator;
104 using distribution_t = std::uniform_int_distribution<IntType>;
105 mutable distribution_t m_distribution;
107 static IntType convert_to_inclusive(IntType upper_bound) {
108 assert(upper_bound > 0);
109 return upper_bound - 1;
120 RandomEngine random_engine = RandomEngine{}) :
121 m_generator(std::move(random_engine)),
122 m_distribution(0, convert_to_inclusive(range_size)) {
136 using hamming_hash_function_generator =
137 random_projection_hash_function_generator<>;
141 template <
typename FloatType =
double>
145 using r_param_t = boost::numeric::ublas::vector<FloatType>;
155 template<
class Archive>
156 void serialize(Archive & ar,
const unsigned int version) {
171 FloatType b, FloatType w) :
172 m_r(std::move(r)), m_b(b), m_w(w) {
180 return boost::equal(m_r, other.m_r) &&
193 template <
typename Range>
201 using boost::numeric::ublas::inner_prod;
202 auto inner_product = inner_prod(m_r, range);
203 return std::floor((m_b + inner_product) / m_w);
211 template <
typename FloatType = double,
212 typename RandomEngine = std::default_random_engine,
213 typename Distribution = std::normal_distribution<FloatType>>
215 std::size_t m_range_size;
217 mutable RandomEngine m_generator;
218 mutable Distribution m_r_distribution;
219 mutable std::uniform_real_distribution<FloatType> m_b_distribution;
231 RandomEngine random_engine = RandomEngine{}) :
232 m_range_size(range_size),
234 m_generator(std::move(random_engine)),
235 m_b_distribution(FloatType{}, w) {
244 typename l_p_hash_function<FloatType>::r_param_t r(m_range_size);
245 boost::generate(r, [&]() {
246 return m_r_distribution(m_generator);
249 FloatType b = m_b_distribution(m_generator);
255 template <
typename FloatType = double,
256 typename RandomEngine = std::default_random_engine>
257 using l_1_hash_function_generator =
258 l_p_hash_function_generator<FloatType, RandomEngine,
259 std::cauchy_distribution<FloatType>>;
262 template <
typename FloatType = double,
263 typename RandomEngine = std::default_random_engine>
264 using l_2_hash_function_generator =
265 l_p_hash_function_generator<FloatType, RandomEngine>;
270 std::vector<std::size_t> m_perm;
275 template<
class Archive>
276 void serialize(Archive & ar,
const unsigned int version) {
281 template <
typename RandomEngine>
283 : m_perm(set_element_upper_bound) {
284 boost::iota(m_perm, 0);
285 std::shuffle(m_perm.begin(), m_perm.end(), rng);
293 return boost::equal(m_perm, other.m_perm);
305 template <
typename Range>
308 using iter_t =
typename boost::range_iterator<Range>::type;
310 auto perm_function = [&](iter_t it) {
311 auto set_elem = it.index();
312 assert(set_elem < m_perm.size());
313 return m_perm[set_elem];
316 boost::counting_range(range)
319 assert(!boost::empty(perm_range));
320 return *boost::min_element(perm_range);
325 template <
typename RandomEngine = std::default_random_engine>
327 std::size_t m_range_size;
328 mutable RandomEngine m_generator;
333 RandomEngine random_engine = RandomEngine{}) :
334 m_range_size(range_size),
335 m_generator(std::move(random_engine)) { }
342 using jaccard_hash_function_generator =
343 min_hash_function_generator<>;
349 #endif // PAAL_LSH_FUNCTIONS_HPP
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
bool operator==(projection_hash_function const &other) const
operator==
Factory class for projection_hash_function.
projection_hash_function< IntType > operator()() const
operator()
min_hash_function operator()() const
operator()
l_p_hash_function()=default
default constructor
Factory class for l_p_hash_function.
random_projection_hash_function_generator(IntType range_size, RandomEngine random_engine=RandomEngine{})
constructor
min_hash_function_generator(std::size_t range_size, RandomEngine random_engine=RandomEngine{})
constructor
auto make_assignable_functor(Functor &f)
make function for assignable_functor
This file contains set of simple useful functors or functor adapters.
void serialize(Archive &ar, const unsigned int version)
serialize
min-wise independent permutations locality sensitive hashing (Jaccard)
min_hash_function(std::size_t set_element_upper_bound, RandomEngine &&rng)
constructor
l_p_hash_function_generator(std::size_t range_size, FloatType w, RandomEngine random_engine=RandomEngine{})
constructor
void serialize(Archive &ar, const unsigned int version)
serialize
void serialize(Archive &ar, const unsigned int version)
serialize
l_p_hash_function(r_param_t r, FloatType b, FloatType w)
constructor
auto operator()(Range &&range) const
operator()
projection_hash_function()=default
default constructor
auto operator()(Range &&range) const
operator()
l_p_hash_function< FloatType > operator()() const
operator()
bool operator==(min_hash_function const &other) const
operator==
bool operator==(l_p_hash_function const &other) const
operator==
hash_function for l_p distance for p in range (0,2]
Factory class for min_hash_function.
projection_hash_function(IntType chosen_position)
constructor
auto operator()(Range &&range) const -> range_to_elem_t< decltype(range)>
operator()
min_hash_function()=default
default constructor