All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Pages
read_svm.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014 Andrzej Pacuk
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //=======================================================================
15 #ifndef PALL_READ_SVM_HPP
16 #define PALL_READ_SVM_HPP
17 
19 #include "paal/utils/functors.hpp"
22 
23 #include <algorithm>
24 #include <cassert>
25 #include <istream>
26 #include <ios>
27 #include <string>
28 #include <sstream>
29 #include <utility>
30 #include <vector>
31 
32 namespace paal {
33 
34 namespace detail {
44 template <typename RowsRange, typename RowRefExtractor>
45 void resize_rows(RowsRange &&rows, RowRefExtractor row_ref_extractor,
46  std::size_t new_size) {
47  for (auto &row : rows) {
48  row_ref_extractor(row).resize(new_size);
49  }
50 }
51 
52 
53 
61 template <typename CoordinatesType,
62  typename ResultType = int,
63  typename FeatureIdType = std::size_t>
64 class svm_row {
65  CoordinatesType m_coordinates;
66  ResultType m_result;
67  bool m_has_out_of_bounds_feature_id = false;
68  using coordinate_t = range_to_elem_t<CoordinatesType>;
69 
70 public:
72  svm_row(FeatureIdType dimensions = 1) : m_coordinates(dimensions) {}
73 
94  friend std::istream& operator>>(std::istream &row_stream, svm_row &row) {
95  row.nullify_coordinates();
96 
97  if (!row.read_result(row_stream)) {
98  return row_stream;
99  }
100 
101  while (row_stream.good()) {
102  row.read_single_feature(row_stream);
103  }
104 
105  row.fail_stream_if_out_of_bound_features(row_stream);
106 
107  return row_stream;
108  }
109 
111  CoordinatesType const &get_coordinates() const { return m_coordinates; }
113  ResultType const &get_result() const { return m_result; }
114 
115 private:
116  void nullify_coordinates() {
117  auto size = m_coordinates.size();
118  m_coordinates.clear();
119  m_coordinates.resize(size);
120  }
121 
122  std::istream& read_result(std::istream &row_stream) {
123  ResultType result;
124  if (!(row_stream >> result)) {
125  return row_stream;
126  }
127  m_result = (result == 1) ? 1 : 0;
128 
129  return row_stream;
130  }
131 
132  std::istream& read_single_feature(std::istream &row_stream) {
133  FeatureIdType feature_id;
134  read_feature_id(row_stream, feature_id);
135  if (!row_stream.good()) {
136  return row_stream;
137  }
138 
139  if (!skip_exact_character(row_stream, ':')) {
140  return row_stream;
141  }
142 
143  coordinate_t coordinate;
144  if (!(row_stream >> coordinate)) {
145  return row_stream;
146  }
147 
148  save(feature_id, coordinate);
149  return row_stream;
150  }
151 
152  std::istream& read_feature_id(std::istream &stream, FeatureIdType &feature_id) const {
153  stream >> feature_id;
154  if (stream.fail() && stream.eof()) {
155  flip_state(stream, std::ios::failbit);
156  }
157  return stream;
158  }
159 
160  void flip_state(std::istream &stream, const std::ios::iostate &state) const {
161  stream.clear(stream.rdstate() ^ state);
162  }
163 
164  std::istream& skip_exact_character(std::istream &stream, char character) const {
165  char c;
166  stream.get(c);
167  if (!stream.good() || c != character) {
168  stream.setstate(std::ios::failbit);
169  }
170  return stream;
171  }
172 
173  void save(FeatureIdType feature_id, coordinate_t coordinate) {
174  if (m_coordinates.size() <= feature_id) {
175  m_has_out_of_bounds_feature_id = true;
176  m_coordinates.resize(feature_id + 1);
177  }
178  m_coordinates[feature_id] = coordinate;
179  }
180 
181  void fail_stream_if_out_of_bound_features(std::istream &stream) const {
182  if (m_has_out_of_bounds_feature_id) {
183  stream.setstate(std::ios::failbit);
184  }
185  }
186 };
187 
188 }
189 
203 template <typename RowType,
204  typename ResultType = int,
205  typename ShouldIgnoreBadRow = utils::always_false>
206 void read_svm(std::istream &input_stream,
207  std::size_t &max_dimensions,
208  std::vector<std::tuple<RowType, ResultType>> &points,
209  std::size_t max_points_to_read,
210  ShouldIgnoreBadRow &&should_ignore_bad_row = ShouldIgnoreBadRow{}) {
211  assert(input_stream.good());
212 
213  detail::svm_row<RowType, ResultType> row{max_dimensions};
214  std::string line;
215  while ((max_points_to_read--) && std::getline(input_stream, line)) {
216  std::stringstream row_stream(line);
217  row_stream >> row;
218  if (row_stream || !should_ignore_bad_row(line)) {
219  assign_max(max_dimensions, row.get_coordinates().size());
220  points.emplace_back(row.get_coordinates(),
221  row.get_result());
222  }
223  }
224 }
225 
236 template <typename RowType,
237  typename ResultType = int>
238 auto read_svm(std::istream &input_stream) {
239  assert(input_stream.good());
240 
241  using point_with_result_t = std::tuple<RowType, ResultType>;
242 
243  std::size_t max_dimensions = 0;
244  std::size_t max_points_to_read = 1;
245  std::vector<point_with_result_t> points;
246  while (input_stream.good()) {
247  read_svm(input_stream, max_dimensions, points, max_points_to_read);
248  }
249  detail::resize_rows(points, utils::tuple_get<0>(), max_dimensions);
250 
251  return std::make_tuple(points, max_dimensions);
252 }
253 
254 }
255 
256 #endif /* PALL_READ_SVM_HPP */
257 
typename boost::range_value< Range >::type range_to_elem_t
for given range returns type of its element
friend std::istream & operator>>(std::istream &row_stream, svm_row &row)
reads svm row of format:
Definition: read_svm.hpp:94
CoordinatesType const & get_coordinates() const
coordinates getter
Definition: read_svm.hpp:111
functor return false
Definition: functors.hpp:222
This file contains set of simple useful functors or functor adapters.
class that can read single svm row
Definition: read_svm.hpp:64
void assign_max(T &t, const T &u)
ResultType const & get_result() const
result getter
Definition: read_svm.hpp:113
svm_row(FeatureIdType dimensions=1)
constructor
Definition: read_svm.hpp:72
void resize_rows(RowsRange &&rows, RowRefExtractor row_ref_extractor, std::size_t new_size)
resize rows to have equal sizes
Definition: read_svm.hpp:45
functor for std::tuple::get&lt;I&gt;
Definition: functors.hpp:1002
void read_svm(std::istream &input_stream, std::size_t &max_dimensions, std::vector< std::tuple< RowType, ResultType >> &points, std::size_t max_points_to_read, ShouldIgnoreBadRow &&should_ignore_bad_row=ShouldIgnoreBadRow{})
detail
Definition: read_svm.hpp:206