23 #include <boost/archive/binary_iarchive.hpp>
24 #include <boost/archive/binary_oarchive.hpp>
25 #include <boost/numeric/ublas/io.hpp>
26 #include <boost/numeric/ublas/matrix.hpp>
27 #include <boost/program_options.hpp>
35 namespace utils = paal::utils;
36 namespace po = boost::program_options;
37 using coordinate_t = double;
38 using matrix_t = boost::numeric::ublas::matrix<coordinate_t>;
43 size_t m_sketch_compress_size;
46 size_t m_row_buffer_size;
47 bool m_compress_at_end;
50 void m_main(po::variables_map
const &vm,
params const &p,
51 std::istream &input_stream, std::ostream &output_stream) {
54 std::vector<std::vector<coordinate_t>> row_buffer;
55 row_buffer.reserve(p.m_row_buffer_size);
57 auto ignore_bad_row = [&](std::string
const &bad_line) {
58 utils::warning(
"following line will be ignored cause of bad format: ", bad_line);
62 std::size_t rows_count;
63 std::size_t columns_count;
64 if (vm.count(
"model_in")) {
65 std::ifstream ifs(vm[
"model_in"].as<std::string>());
66 boost::archive::binary_iarchive ia(ifs);
69 rows_count = sketch.size1();
70 columns_count = sketch.size2();
73 paal::read_rows_first_row_size<coordinate_t>
74 (input_stream, row_buffer, p.m_row_buffer_size, ignore_bad_row);
76 if(row_buffer.empty()) {
80 rows_count = p.m_sketch_rows;
81 columns_count = boost::size(row_buffer.front());
82 if(vm.count(
"sketch_compress_size")) {
83 fd_sketch = paal::make_frequent_directions<coordinate_t>(rows_count, columns_count, p.m_sketch_compress_size);
86 fd_sketch = paal::make_frequent_directions<coordinate_t>(rows_count, columns_count);
89 fd_sketch.update_range(row_buffer);
92 while (input_stream.good()) {
94 paal::read_rows<coordinate_t>
95 (input_stream, row_buffer, columns_count, p.m_row_buffer_size, ignore_bad_row);
96 fd_sketch.update_range(row_buffer);
99 if (p.m_compress_at_end) {
100 fd_sketch.compress();
103 auto sketch = fd_sketch.get_sketch().first;
104 boost::numeric::ublas::matrix_range<matrix_t> sketch_range (sketch,
105 boost::numeric::ublas::range(0, fd_sketch.get_sketch().second),
106 boost::numeric::ublas::range(0, columns_count));
108 output_stream << std::endl;
110 if (vm.count(
"model_out")) {
111 std::ofstream ofs(vm[
"model_out"].as<std::string>());
112 boost::archive::binary_oarchive oa(ofs);
118 int main(
int argc,
char** argv) {
121 po::options_description desc(
"Frequent-directions - \n"\
122 "suite for a matrix sketching using Singular Value Decomposition\n\nUsage:\n"\
123 "This command will read data from standard input and write computed sketch to standard output:\n"\
124 "\tfrequent-directions --sketch_rows numer_of_sketch_rows\n\n"\
125 "If you want to read data from an input_file and write computed sketch to an output_file you can use following command:\n"\
126 "\tfrequent-directions --input input_file --output output_file -r rows\n\n"\
127 "If you want to change compress_size and save model you can use following command:\n"\
128 "\tfrequent-directions -i input_file -r rows -s compress_size --model_out model\n\n"\
129 "Then if you want to use this model and add additional data:\n"\
130 "\tfrequent-directions -i input_file --model_in model\n\n"\
131 "Options description");
134 (
"help,h",
"help message")
135 (
"input,i", po::value<std::string>(),
"path to the file with input data in csv format with space as delimiter, "\
136 "(default read from standart input)")
137 (
"output,o", po::value<std::string>(),
"path to the file with result sketch matrix, only nonzero rows are printed, "\
138 "(default write to standart output)")
139 (
"sketch_rows,r", po::value<std::size_t>(&p.m_sketch_rows),
"number of sketch rows")
140 (
"sketch_compress_size,s", po::value<size_t>(&p.m_sketch_compress_size),
"sketch compress size, "\
141 "(default is half of number of sketch rows)")
142 (
"model_in", po::value<std::string>(),
"read the sketch model from this file")
143 (
"model_out", po::value<std::string>(),
"write the sketch model to this file")
144 (
"final_compress", po::value<bool>(&p.m_compress_at_end)->default_value(
true),
145 "determine if sketch will be compressed after update all data, "\
146 "compression in the final phase is necessary to fulfill sketch approximation ratios")
150 (
"row_buffer_size", po::value<std::size_t>(&p.m_row_buffer_size)->default_value(100000),
151 "size of row buffer (default value = 100000)")
154 po::variables_map vm;
155 po::store(po::parse_command_line(argc, argv, desc), vm);
158 auto param_is_set_explicitly = [&vm] (
const std::string ¶m_name) {
159 return vm.count(param_name) > 0 && !vm[param_name].defaulted();
162 if (vm.count(
"help")) {
167 auto error_with_usage = [&] (
const std::string &message) {
171 if (vm.count(
"model_in") == 0 && vm.count(
"sketch_rows") == 0) {
172 error_with_usage(
"Input model sketch or number of sketch rows was not set");
175 if (vm.count(
"model_in")) {
176 auto ignored = [&](std::string
const & param) {
177 if (param_is_set_explicitly(param)) {
178 utils::warning(
"parameter ", param,
" was set, but model_in is used, param ", param,
" is discarded");
181 ignored(
"sketch_rows");
182 ignored(
"sketch_compress_size");
185 if (p.m_row_buffer_size <= 0) {
186 error_with_usage(
"Size of row buffer must be positive");
190 if (vm.count(
"input")) {
191 ifs.open(vm[
"input"].as<std::string>());
195 if (vm.count(
"output")) {
196 ofs.open(vm[
"output"].as<std::string>());
200 vm.count(
"input") ? ifs : std::cin,
201 vm.count(
"output") ? ofs : std::cout);
Represents sketch of matrix.
void print_matrix(Stream &o, Matrix &&m, const std::string &del)
prints matrix with delimiters
auto info(Arg &&arg, Args...args)
prints info message
This file contains set of simple useful functors or functor adapters.
auto warning(Arg &&arg, Args...args)
prints warning message
auto failure(Arg &&arg, Args...args)
prints failure message
std::pair< Matrix const &, std::size_t > get_sketch()