An error occurred while loading the file. Please try again.
-
Pablo authored4ecdc540
/**
* \class file
* \author pbrox
*
*/
#ifndef TC_FILE_HDFS
#define TC_FILE_HDFS
#include <experimental/filesystem>
#include <cassert>
#include <optional>
#include <fstream>
#include "hdfs.h"
#include "block_generic.hpp"
#include "../stats/stats.hpp"
#include "container.hpp"
//Binary file local logger
static LoggerPtr tfh_logger(Logger::getLogger( "text_file.hdfs"));
namespace fs = std::experimental::filesystem;
namespace aspide{
class file_hdfs{
private:
//Path to file to open
fs::path file_container;
//Separator inside files
char separator;
//HDFS filesystem reference
hdfsFS * fs_ref;
//HDFS file
hdfsFile data_stream;
//HDFS bloxk size of the file
int64_t hdfs_chunk;
//File size
int64_t fsize;
//Stats pointer
stats * _stat;
public:
//Constructor
file_hdfs(fs::path filepath, char separator, hdfsFS * hdfs_fs, stats * _stat);
//Move constructor
file_hdfs(file_hdfs && rhs);
//Iterators
class iterator : public std::iterator<std::input_iterator_tag, std::string>
{
private:
//string to store the data read by this iterator
std::string data;
//Outer references to access separator and stream
//Separator inside files
char outer_sp;
//HDFS filesystem reference
hdfsFS * outer_fs;
//HDFS file
hdfsFile outer_ds;
//HDFS bloxk size of the file
int64_t outer_ch;
//Position on the current file
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
unsigned long long line;
//Out scontainer stats structure
stats * _stat;
/*
Flag identifying the end of the current file, rhis flag is set to true when EOF is read.
Remark, the end operator will be defined ONLY by this flag
*/
bool _eof;
/* HDFS getline parameters */
char * buffer; //internal buffer
size_t st_point = 0, buff_size = 0; //pointer inside the buffer that size , both set to 0 to force read on the first run
/* Getline function */
std::string getline();
int64_t parse();
public:
//Constructor, returns an iterator to begin of the file
iterator(file_hdfs & file, bool end_f = false) : outer_sp(file.separator), outer_fs(file.fs_ref), outer_ds(file.data_stream), outer_ch(file.hdfs_chunk),
_eof(end_f), _stat{file._stat}, line(0) {buffer = new char[outer_ch]; LOG4CXX_INFO(tfh_logger, "HDFS text_file iterator created"); ++*this;} //getting memory for buffer, ++called to read first line
//Move constructor
iterator(iterator && rhs): data{std::move(rhs.data)}, outer_sp(rhs.outer_sp), outer_fs(rhs.outer_fs), outer_ds(rhs.outer_ds), outer_ch(rhs.outer_ch),
line{rhs.line}, _eof{rhs._eof}, st_point{rhs.st_point}, buff_size{rhs.buff_size}, buffer{rhs.buffer}, _stat{rhs._stat} {rhs.buffer = nullptr;}
//Destructor
~iterator(){delete[] buffer; LOG4CXX_DEBUG(tfh_logger, "HDFS text_file iterator destroyed, pointer=" << this);} //Cleans the buffer
//iterator operands
std::string operator*() const{return data;} //Returns the data string
//Operator to acquire the next line
iterator & operator++();
//Boolean comparison operators
bool operator==(const iterator & rhs) const;
bool operator!=(const iterator & rhs) { return !(*this == rhs);}
//Move Assignment operator
iterator& operator=(iterator &&rhs);
};
bool operator==(const file_hdfs & rhs) const;
//Get an iterator to the first element
iterator begin(){ return iterator(*this);}
//Get an iterator to the last element
iterator end(){return iterator(*this,true);}
//Destructor
~file_hdfs(){if(data_stream) hdfsCloseFile(*fs_ref, data_stream); LOG4CXX_DEBUG(tfh_logger, "HDFS text file destroyed, pointer=" << this);}
//Get a block server, blocksize set to HDFS file block size
block_sv_generic get_blocks(size_t size=0){return block_sv_generic(hdfs_chunk, data_stream, fs_ref, _stat);}
//Overloading move assignment operator
file_hdfs& operator=(file_hdfs &&rhs);
//Get the size (in bytes) of the container.
int64_t size(){return this->fsize;}
std::string path();
//Get 3 data locations for the file
std::array<std::string, 3> get_data_location();
};
141142143144145
}
#endif