All Classes Namespaces Files Functions Variables Typedefs Enumerations Friends Macros Pages
mapped_file.hpp
Go to the documentation of this file.
1 //=======================================================================
2 // Copyright (c) 2014 Karol Wegrzycki
3 //
4 //
5 // Distributed under the Boost Software License, Version 1.0. (See
6 // accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8 //=======================================================================
9 
18 #ifndef PAAL_MAPPED_FILE_HPP
19 #define PAAL_MAPPED_FILE_HPP
20 
21 #define BOOST_ERROR_CODE_HEADER_ONLY
22 #define BOOST_SYSTEM_NO_DEPRECATED
23 
25 #include "paal/utils/irange.hpp"
26 
28 
29 #include <boost/iostreams/device/mapped_file.hpp>
30 
31 #include <string>
32 #include <vector>
33 #include <thread>
34 
35 namespace paal {
36 namespace data_structures {
37 
43 class mapped_file {
44 private:
45 
46  char const * m_current;
47  char const * m_file_begin;
48  char const * m_file_end;
49  char const * m_chunk_suggested_end;
50 
51 public:
52 
62  mapped_file(char const * file, size_t file_size, unsigned chunk_index, unsigned chunk_cnt):
63  mapped_file(file, file_size) {
64  assert(chunk_cnt > 0);
65  assert(chunk_index < chunk_cnt);
66  m_current = m_file_begin + file_size * chunk_index / chunk_cnt;
67  m_chunk_suggested_end = m_file_begin + file_size * (chunk_index + 1) / chunk_cnt;
68  if (m_current > m_file_begin && *(m_current-1) != '\n') {
69  get_line();
70  }
71  }
72 
79  mapped_file(char const * file, size_t file_size) :
80  m_current(file),
81  m_file_begin(file),
82  m_file_end(file+file_size),
83  m_chunk_suggested_end(m_file_end) {}
84 
91  std::string get_line() {
92  auto result_begin = m_current;
93  auto result_end = std::find(m_current, m_file_end, '\n');
94 
95  m_current = result_end + 1;
96  return std::string(result_begin, result_end-result_begin);
97  }
98 
102  bool eof() const {
103  return m_current >= m_file_end;
104  }
105 
109  bool end_of_chunk() const {
110  return m_current >= m_chunk_suggested_end;
111  }
120  template <typename Functor>
121  void for_each_line(Functor f) {
122  while (!eof() && !end_of_chunk()) {
123  f(get_line());
124  }
125  }
126 
127 };
128 
129 
147 template <typename Functor>
148 auto for_each_line(Functor f, std::string const & file_path,
149  unsigned threads_count = std::thread::hardware_concurrency()) {
150 
151  using results_t = std::vector<pure_result_of_t<Functor(std::string)>>;
152 
153  std::vector<results_t> results(threads_count);
154  thread_pool threads(threads_count);
155 
156  boost::iostreams::mapped_file_source mapped(file_path);
157  auto data = mapped.data();
158 
159  for (auto i : irange(threads_count)) {
160  threads.post([&, i]() {
161  mapped_file file_chunk(data, mapped.size(), i, threads_count);
162  file_chunk.for_each_line(
163  [&](std::string const & line) {
164  results[i].push_back(f(line));
165  }
166  );
167  });
168  }
169 
170  threads.run();
171  mapped.close();
172 
173  results_t joined_results;
174  for (auto const & v: results) {
175  joined_results.insert(end(joined_results), std::begin(v), std::end(v));
176  }
177  return joined_results;
178 }
179 
180 }
181 }
182 #endif // PAAL_MAPPED_FILE_HPP
auto for_each_line(Functor f, std::string const &file_path, unsigned threads_count=std::thread::hardware_concurrency())
for_every_line function provides basic functionality for processing text files quickly and clearly...
bool eof() const
is m_currently at the end of file
void post(Functor f)
post new task
Definition: thread_pool.hpp:39
mapped_file(char const *file, size_t file_size)
Initializes mmaped file.
Definition: mapped_file.hpp:79
void for_each_line(Functor f)
Computes functor on every line of the file. It takes care of the chunks and end of file...
auto irange(T begin, T end)
irange
Definition: irange.hpp:22
mapped_file(char const *file, size_t file_size, unsigned chunk_index, unsigned chunk_cnt)
Initializes mmaped file with the specific chunk - so that every thread could use different part of th...
Definition: mapped_file.hpp:62
std::string get_line()
Gets line from the m_current file. Eof and End Of Chunk aren&#39;t checked here.
Definition: mapped_file.hpp:91
simple threadpool, class uses also current thread!
Definition: thread_pool.hpp:25
void run()
run all posted tasks (blocking)
Definition: thread_pool.hpp:45
bool end_of_chunk() const
is m_currently at the end of requested part of the file
data structure that gets new lines for many threads
Definition: mapped_file.hpp:43