file_hdfs.hpp 4.27 KiB
/**
* \class file
* \author pbrox
* 
*/
#ifndef TC_FILE_HDFS
#define TC_FILE_HDFS
#include <experimental/filesystem>
#include <cassert>
#include <optional>
#include <fstream>
#include "hdfs.h"
#include "block_generic.hpp"
#include "../stats/stats.hpp"
#include "container.hpp"
//Binary file local logger
static LoggerPtr tfh_logger(Logger::getLogger( "text_file.hdfs"));
namespace fs = std::experimental::filesystem;
namespace aspide{
	class file_hdfs{
		private:
			//Path to file to open
			fs::path file_container;
			//Separator inside files
			char separator;
			//HDFS filesystem reference
			hdfsFS * fs_ref;
			//HDFS file
			hdfsFile data_stream;
			//HDFS bloxk size of the file
			int64_t hdfs_chunk;
			//File size
			int64_t fsize;
			//Stats pointer
			stats * _stat;
		public:
			//Constructor 
			file_hdfs(fs::path filepath, char separator, hdfsFS * hdfs_fs, stats * _stat);
			//Move constructor
			file_hdfs(file_hdfs && rhs);
			//Iterators
			class iterator : public std::iterator<std::input_iterator_tag, std::string>
				private:
					//string to store the data read by this iterator
					std::string data;
					//Outer references to access separator and stream
                    //Separator inside files
                    char outer_sp;
                    //HDFS filesystem reference
                    hdfsFS * outer_fs;
                    //HDFS file
                    hdfsFile outer_ds;
                    //HDFS bloxk size of the file
                    int64_t outer_ch;
					//Position on the current file
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
unsigned long long line; //Out scontainer stats structure stats * _stat; /* Flag identifying the end of the current file, rhis flag is set to true when EOF is read. Remark, the end operator will be defined ONLY by this flag */ bool _eof; /* HDFS getline parameters */ char * buffer; //internal buffer size_t st_point = 0, buff_size = 0; //pointer inside the buffer that size , both set to 0 to force read on the first run /* Getline function */ std::string getline(); int64_t parse(); public: //Constructor, returns an iterator to begin of the file iterator(file_hdfs & file, bool end_f = false) : outer_sp(file.separator), outer_fs(file.fs_ref), outer_ds(file.data_stream), outer_ch(file.hdfs_chunk), _eof(end_f), _stat{file._stat}, line(0) {buffer = new char[outer_ch]; LOG4CXX_INFO(tfh_logger, "HDFS text_file iterator created"); ++*this;} //getting memory for buffer, ++called to read first line //Move constructor iterator(iterator && rhs): data{std::move(rhs.data)}, outer_sp(rhs.outer_sp), outer_fs(rhs.outer_fs), outer_ds(rhs.outer_ds), outer_ch(rhs.outer_ch), line{rhs.line}, _eof{rhs._eof}, st_point{rhs.st_point}, buff_size{rhs.buff_size}, buffer{rhs.buffer}, _stat{rhs._stat} {rhs.buffer = nullptr;} //Destructor ~iterator(){delete[] buffer; LOG4CXX_DEBUG(tfh_logger, "HDFS text_file iterator destroyed, pointer=" << this);} //Cleans the buffer //iterator operands std::string operator*() const{return data;} //Returns the data string //Operator to acquire the next line iterator & operator++(); //Boolean comparison operators bool operator==(const iterator & rhs) const; bool operator!=(const iterator & rhs) { return !(*this == rhs);} //Move Assignment operator iterator& operator=(iterator &&rhs); }; bool operator==(const file_hdfs & rhs) const; //Get an iterator to the first element iterator begin(){ return iterator(*this);} //Get an iterator to the last element iterator end(){return iterator(*this,true);} //Destructor ~file_hdfs(){if(data_stream) hdfsCloseFile(*fs_ref, data_stream); LOG4CXX_DEBUG(tfh_logger, "HDFS text file destroyed, pointer=" << this);} //Get a block server, blocksize set to HDFS file block size block_sv_generic get_blocks(size_t size=0){return block_sv_generic(hdfs_chunk, data_stream, fs_ref, _stat);} //Overloading move assignment operator file_hdfs& operator=(file_hdfs &&rhs); //Get the size (in bytes) of the container. int64_t size(){return this->fsize;} std::string path(); //Get 3 data locations for the file std::array<std::string, 3> get_data_location(); };
141142143144145
} #endif