18 #ifndef PAAL_MAPPED_FILE_HPP
19 #define PAAL_MAPPED_FILE_HPP
21 #define BOOST_ERROR_CODE_HEADER_ONLY
22 #define BOOST_SYSTEM_NO_DEPRECATED
29 #include <boost/iostreams/device/mapped_file.hpp>
36 namespace data_structures {
46 char const * m_current;
47 char const * m_file_begin;
48 char const * m_file_end;
49 char const * m_chunk_suggested_end;
62 mapped_file(
char const * file,
size_t file_size,
unsigned chunk_index,
unsigned chunk_cnt):
64 assert(chunk_cnt > 0);
65 assert(chunk_index < chunk_cnt);
66 m_current = m_file_begin + file_size * chunk_index / chunk_cnt;
67 m_chunk_suggested_end = m_file_begin + file_size * (chunk_index + 1) / chunk_cnt;
68 if (m_current > m_file_begin && *(m_current-1) !=
'\n') {
82 m_file_end(file+file_size),
83 m_chunk_suggested_end(m_file_end) {}
92 auto result_begin = m_current;
93 auto result_end = std::find(m_current, m_file_end,
'\n');
95 m_current = result_end + 1;
96 return std::string(result_begin, result_end-result_begin);
103 return m_current >= m_file_end;
110 return m_current >= m_chunk_suggested_end;
120 template <
typename Functor>
147 template <
typename Functor>
149 unsigned threads_count = std::thread::hardware_concurrency()) {
151 using results_t = std::vector<pure_result_of_t<Functor(std::string)>>;
153 std::vector<results_t> results(threads_count);
156 boost::iostreams::mapped_file_source mapped(file_path);
157 auto data = mapped.data();
159 for (
auto i :
irange(threads_count)) {
160 threads.
post([&, i]() {
161 mapped_file file_chunk(data, mapped.size(), i, threads_count);
163 [&](std::string
const & line) {
164 results[i].push_back(f(line));
173 results_t joined_results;
174 for (
auto const & v: results) {
175 joined_results.insert(end(joined_results), std::begin(v), std::end(v));
177 return joined_results;
182 #endif // PAAL_MAPPED_FILE_HPP
auto for_each_line(Functor f, std::string const &file_path, unsigned threads_count=std::thread::hardware_concurrency())
for_every_line function provides basic functionality for processing text files quickly and clearly...
bool eof() const
is m_currently at the end of file
void post(Functor f)
post new task
mapped_file(char const *file, size_t file_size)
Initializes mmaped file.
void for_each_line(Functor f)
Computes functor on every line of the file. It takes care of the chunks and end of file...
auto irange(T begin, T end)
irange
mapped_file(char const *file, size_t file_size, unsigned chunk_index, unsigned chunk_cnt)
Initializes mmaped file with the specific chunk - so that every thread could use different part of th...
std::string get_line()
Gets line from the m_current file. Eof and End Of Chunk aren't checked here.
simple threadpool, class uses also current thread!
void run()
run all posted tasks (blocking)
bool end_of_chunk() const
is m_currently at the end of requested part of the file
data structure that gets new lines for many threads