All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Pages
frequent_directions.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //=======================================================================
15 #ifndef PAAL_FREQUENT_DIRECTIONS_HPP
16 #define PAAL_FREQUENT_DIRECTIONS_HPP
17 
19 #include "paal/utils/irange.hpp"
20 
21 #include <boost/numeric/bindings/lapack/gesvd.hpp>
22 #include <boost/numeric/ublas/detail/matrix_assign.hpp>
23 #include <boost/numeric/ublas/matrix.hpp>
24 #include <boost/numeric/ublas/matrix_expression.hpp>
25 
26 #include <boost/range/algorithm/copy.hpp>
27 #include <boost/range/distance.hpp>
28 #include <boost/range/size.hpp>
29 
30 #include <utility>
31 
32 namespace paal {
33 
34 namespace detail {
35 
36 template <typename InputRow,
37  typename DestRow,
38  typename Dummy>
39 void copy_row_to_matrix(InputRow &&input_row, DestRow &dest_matrix_row,
40  Dummy /* is_row_of_sparse_matrix */,
41  std::true_type /* rows_are_assignable */) {
42  assert(boost::size(dest_matrix_row) == boost::size(input_row));
43  dest_matrix_row = input_row;
44 };
45 
46 template <typename InputRow,
47  typename DestRow>
48 void copy_row_to_matrix(InputRow &&input_row, DestRow &dest_matrix_row,
49  std::false_type /* is_row_of_sparse_matrix*/,
50  std::false_type /* rows_are_assignable */) {
51  //boost::size does not compile on ublas structures
52  assert(boost::distance(dest_matrix_row) ==
53  std::distance(std::begin(input_row),
54  std::end(input_row)));
55  std::copy(std::begin(input_row),
56  std::end(input_row),
57  std::begin(dest_matrix_row));
58 };
59 
60 template <typename InputRow,
61  typename DestRow>
62 void copy_row_to_matrix(InputRow &&input_row, DestRow &dest_matrix_row,
63  std::true_type /* is_row_of_sparse_matrix */,
64  std::false_type /* rows_are_assignable */) {
65  for (auto it = std::begin(input_row); it != std::end(input_row); ++it) {
66  assert(it.index2() <= dest_matrix_row.size());
67  dest_matrix_row(it.index2()) = *it;
68  }
69 };
70 
71 
72 template <typename InputRow,
73  typename DestRow>
74 void copy_row_to_matrix(InputRow &&input_row, DestRow &dest_matrix_row) {
75  copy_row_to_matrix(std::forward<InputRow>(input_row), dest_matrix_row,
76  typename data_structures::is_sparse_row<InputRow>::type(),
77  typename std::is_assignable<DestRow,InputRow>::type());
78 };
79 
80 } // detail
81 
91 template <typename Matrix>
93  Matrix m_sketch;
94  std::size_t m_actual_size;
95  std::size_t m_compress_size;
96 
98  using coordinate_t = typename matrix_types::coordinate_t;
99 
100 public:
102  template<class Archive>
103  void serialize(Archive & ar, const unsigned int version) {
104  ar & m_actual_size;
105  ar & m_compress_size;
106  ar & m_sketch;
107  }
108 
115  frequent_directions(Matrix matrix, std::size_t const compress_size)
116  : m_sketch(std::move(matrix)), m_actual_size(0),
117  m_compress_size(std::min(compress_size, matrix_types::num_columns(m_sketch))) {
118  assert(m_compress_size >= 0);
119  assert(m_compress_size < matrix_types::num_rows(m_sketch));
120  }
121 
123  frequent_directions() : m_actual_size(0), m_compress_size(0) {}
124 
126  bool operator==(frequent_directions const & other) const {
127  return m_actual_size == other.m_actual_size &&
128  m_compress_size == other.m_compress_size &&
129  boost::numeric::ublas::detail::expression_type_check(m_sketch, other.m_sketch);
130  }
131 
133  template <typename MatrixData>
134  void update(MatrixData&& matrix) {
135  for (auto row = matrix.begin1(); row != matrix.end1(); ++row) {
136  update_row(std::move(row));
137  }
138  }
139 
141  template <typename InputRow>
142  void update_row(InputRow&& input_row) {
143  if(m_actual_size == matrix_types::num_rows(m_sketch))
144  {
145  compress();
146  }
147  typename matrix_types::matrix_row_t dest_row{m_sketch, m_actual_size++};
148  detail::copy_row_to_matrix(std::forward<InputRow>(input_row), dest_row);
149  }
150 
151  //TODO add number of threads
153  template <typename RowRange>
154  void update_range(RowRange&& row_range) {
155  for(auto const & row : row_range)
156  update_row(row);
157  }
158 
164  void compress() {
165  auto rows_count = matrix_types::num_rows(m_sketch);
166  auto columns_count = matrix_types::num_columns(m_sketch);
167  auto min_dimension = std::min(rows_count, columns_count);
168 
169  typename matrix_types::matrix_column_major_t u{rows_count,min_dimension}, vt{min_dimension,columns_count};
170  typename matrix_types::vector_t sigma{min_dimension};
171 
172  typename matrix_types::matrix_column_major_t sketch{std::move(m_sketch)};
173  boost::numeric::bindings::lapack::gesvd(sketch, sigma, u, vt);
174 
175  coordinate_t const delta = m_compress_size < min_dimension ? sigma[m_compress_size] * sigma[m_compress_size] : coordinate_t{};
176 
177  for (auto &sigma_i : sigma) {
178  sigma_i = std::sqrt(std::max(sigma_i * sigma_i - delta, coordinate_t{}));
179  }
180 
181  typename matrix_types::matrix_diagonal_t s_diagonal{rows_count, min_dimension, 0, 0, std::move(sigma.data())};
182  m_sketch = boost::numeric::ublas::prod(s_diagonal, vt);
183 
184  m_actual_size = m_compress_size;
185  }
186 
192  std::pair<Matrix const &, std::size_t> get_sketch() {
193  return std::make_pair(std::cref(m_sketch), m_actual_size);
194  }
195 };
196 
197 
199 template <typename Matrix>
200 auto make_frequent_directions(Matrix matrix) {
201  std::size_t const compress_size = data_structures::matrix_type_traits<Matrix>::num_rows(matrix) / 2;
202  return frequent_directions<Matrix>{std::move(matrix), compress_size};
203 }
204 
206 template <typename Matrix>
207 auto make_frequent_directions(Matrix matrix, std::size_t const compress_size) {
208  return frequent_directions<Matrix>{std::move(matrix), compress_size};
209 }
210 
212 template <typename CoordinateType>
213 auto make_frequent_directions(std::size_t rows_count, std::size_t columns_count) {
214  boost::numeric::ublas::matrix<CoordinateType> matrix{rows_count, columns_count, CoordinateType{}};
215  return frequent_directions<boost::numeric::ublas::matrix<CoordinateType>>{std::move(matrix), rows_count / 2};
216 }
217 
219 template <typename CoordinateType>
220 auto make_frequent_directions(std::size_t rows_count, std::size_t columns_count, std::size_t const compress_size) {
221  boost::numeric::ublas::matrix<CoordinateType> matrix{rows_count, columns_count, CoordinateType{}};
222  return frequent_directions<boost::numeric::ublas::matrix<CoordinateType>>{std::move(matrix), compress_size};
223 }
224 
225 } // paal
226 
227 #endif // PAAL_FREQUENT_DIRECTIONS_HPP
Traits class for matrix related types.
Represents sketch of matrix.
void update(MatrixData &&matrix)
Adds new data in matrix form.
frequent_directions()
default constructor, only for serialization purpose
void update_row(InputRow &&input_row)
Adds one new row.
void update_range(RowRange &&row_range)
Adds new rows.
void compress()
Compress sketch.
frequent_directions(Matrix matrix, std::size_t const compress_size)
Creates sketch.
bool operator==(frequent_directions const &other) const
operator==
auto make_frequent_directions(Matrix matrix)
make for frequent_directions
void serialize(Archive &ar, const unsigned int version)
serialize
std::pair< Matrix const &, std::size_t > get_sketch()