Go to the documentation of this file. 1 #ifndef _READ_FILE_HPP_
2 #define _READ_FILE_HPP_ "$Id: read_file.hpp 455 2020-07-23 20:23:59Z geoff $"
193 size_t bytesLeft = used - bytes;
221 unsigned char *bfrAddr =
const_cast<unsigned char *
>(newData) + startFromOffset;
228 size_t byteCount = bytesToAdd;
270 const unsigned char *fileHdr,
const size_t hdrLen);
272 #pragma GCC diagnostic push
273 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
275 #pragma GCC diagnostic pop
325 const char *searchRootPaths,
const char *possibleFilenames=
"",
326 const char *possibleSuffixes=
"");
359 #pragma GCC diagnostic push
360 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
361 virtual void noteDataRead(
const unsigned char *bfr,
size_t bfrLen)
const {}
363 #pragma GCC diagnostic pop
413 #pragma GCC diagnostic push
414 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
423 virtual int processLine(
const unsigned char *line,
size_t lineLen) {
return (0); }
436 virtual int_fast32_t
processBlock(
unsigned char *block,
size_t blockLen) {
447 #pragma GCC diagnostic pop
475 const char *endTag,
size_t maxDocSize=1024*128)
507 blockLen -= bytesAdded;
543 unsigned char *newData = controller->
bufferAddress(rec, &newLen);
551 " bytes as only space for " << left <<
" remains" <<
LOG_ENDLINE;
568 virtual int processDocument(
unsigned char *docStart,
size_t docLen) = 0;
574 virtual int_fast32_t
processBlock(
unsigned char *blockStart,
size_t blockLen) {
580 #pragma GCC diagnostic push
581 #pragma GCC diagnostic ignored "-Wsuggest-final-types"
621 std::map<std::string,std::string>::const_iterator i =
headerAttributes.find(headerElement);
623 return (i->second.c_str());
634 if (line[lineLen - 1] ==
'\n') {
636 if (line[lineLen - 1] ==
'\r') {
648 while ((i < (
sizeof(
httpVersion) - 1)) && (offset < lineLen)) {
649 if (isspace(line[offset]))
break;
653 while (isspace(line[offset]) && (offset < lineLen)) {
657 while ((i < (
sizeof(
statusCode) - 1)) && (offset < lineLen)) {
658 if (isspace(line[offset]))
break;
662 while (isspace(line[offset]) && (offset < lineLen)) {
666 while ((i < (
sizeof(
responseReason) - 1)) && (offset < lineLen)) {
667 if (isspace(line[offset]))
break;
676 #pragma GCC diagnostic push
677 #pragma GCC diagnostic ignored "-Wsuggest-final-methods"
684 if (line[lineLen - 1] ==
'\n') {
686 if (line[lineLen - 1] ==
'\r') {
698 while ((i < (
sizeof(
httpRequest) - 1)) && (offset < lineLen)) {
699 if (isspace(line[offset]))
break;
703 while (isspace(line[offset]) && (offset < lineLen)) {
707 while ((i < (
sizeof(
requestURL) - 1)) && (offset < lineLen)) {
708 if (isspace(line[offset]))
break;
712 while (isspace(line[offset]) && (offset < lineLen)) {
716 while ((i < (
sizeof(
httpVersion) - 1)) && (offset < lineLen)) {
717 if (isspace(line[offset]))
break;
733 if (line[lineLen - 1] ==
'\n') {
735 if (line[lineLen - 1] ==
'\r') {
744 unsigned char *colon = (
unsigned char *) memchr(line,
':', lineLen);
745 if (colon !=
nullptr) {
746 size_t keyLen = (colon - line);
747 size_t valLen = lineLen - (keyLen + 1);
750 if (isspace(*colon) ==
false)
break;
755 if (memcmp(line,
"Content-Length:", 15) == 0) {
758 headerAttributes[std::string((
const char *) line, keyLen)] = std::string((
const char *) colon, valLen);
762 #pragma GCC diagnostic pop
819 #pragma GCC diagnostic pop
822 #pragma GCC diagnostic push
823 #pragma GCC diagnostic ignored "-Wsuggest-final-types"
864 const char *quoteChars =
"\"",
bool hasHeader=
true);
903 unsigned char *line,
size_t lineLen);
941 }
while (bytesProcessed != 0);
969 #pragma GCC diagnostic pop
int32_t contentLengthWanted
Definition: read_file.hpp:602
size_t startBufferOffset
offset of first byte used in buffer
Definition: read_file.hpp:47
#define _STDERR_FD
Platform-independent reference to standard error.
Definition: logging_api.hpp:2617
#define safe_strcpy(d, s, l)
Safe strcpy() routine that will not copy more than l bytes and always ensures that a null is present ...
Definition: compiler_hints.h:696
unsigned char bytesToMatch[16]
Definition: read_file.hpp:263
ssize_t neededBlockLen
Definition: read_file.hpp:467
unsigned char documentStartPrefix[64]
Definition: read_file.hpp:464
unsigned char * endBufferLocation() const OME_ALWAYS_INLINE
Returns just past the end of the active content in the buffer. Normally, this would be where new cont...
Definition: read_file.hpp:138
unsigned char separatorCharList[8]
Definition: read_file.hpp:832
Intermediary I/O processing object for performing multi-threaded receive-and-process operations on a ...
Definition: io_processor.hpp:154
static int defaultReadRoutine(Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen)
Default read routine.
Definition: read_file.cpp:462
int readAndProcessCSV_File()
Top-level routine to parse CSV file.
Definition: read_file.hpp:912
enum Extract_And_Process_Document_Stream::@13 restartScanState
FARGOS file processing infrastructure.
#define memchr_in_long_block(s, c, l)
Definition: read_file.cpp:15
unsigned char * headerLine
Definition: read_file.hpp:831
unsigned char separatorListLen
Definition: read_file.hpp:837
@ FIND_END_TAG
Definition: read_file.hpp:461
char responseReason[48]
Definition: read_file.hpp:599
#define OS_SOCKET_TYPE
Definition: io_processor.hpp:41
FARGOS I/O Processing classes.
void * auxData
Definition: read_file.hpp:281
@ READ_REQUEST
Definition: read_file.hpp:588
File_Buffer(size_t bfrSize=65536)
Creates an empty buffer of the indicated size in the heap.
Definition: read_file.hpp:52
@ READ_FROM_SOCKET
Definition: read_file.hpp:249
int readIntoFileBuffer(File_Buffer *bfr)
Definition: read_file.hpp:383
virtual int processDocument(unsigned char *docStart, size_t docLen)=0
User-exit to process extracted document.
Allocation record for chains in a 32-bit shared memory buffer.
Definition: circular_bfr.hpp:103
~File_Buffer()
Definition: read_file.hpp:84
std::map< std::string, std::string > headerAttributes
Definition: read_file.hpp:603
Extract_And_Process_Document_Stream(const char *docStart, const char *endTag, size_t maxDocSize=1024 *128)
Definition: read_file.hpp:474
File_Buffer * pendingContent
Definition: read_file.hpp:466
static OS_HANDLE_TYPE openFile(const char *fileName, ReadModes mode=READ_NORMAL)
Open the indicated file.
Definition: read_file.cpp:192
static int closeFile(OS_HANDLE_TYPE fd, ReadModes mode=READ_NORMAL)
Close an native operating system file handle.
Definition: read_file.cpp:268
int scanAndProcessBuffer(const unsigned char *block, size_t blockLen)
Definition: read_file.hpp:498
virtual int processHeaderLine(unsigned char *line, size_t lineLen)
Interface to handle special case processing of header lines.
Definition: read_file.hpp:431
Parse_And_Process_HTTP_Stream(size_t maxDocSize=1024 *128, ParseState initialState=READ_RESPONSE)
Definition: read_file.hpp:605
unsigned char * bufferAddress(SharedBufferAllocRecord *rec, size_t *bufferLen=nullptr) const OME_ALWAYS_INLINE
Return physical address of a buffer within the context of the local process' address space.
Definition: io_processor.hpp:275
virtual int readIntoBuffer(unsigned char *bfr, size_t bfrLen)
Definition: read_file.cpp:487
virtual ~Extract_And_Process_Document_Stream()
Definition: read_file.hpp:493
Implements an adjustable sliding buffer that minimizes data movement while enabling streams to proces...
Definition: read_file.hpp:38
@ MAX_FIELDS
Definition: read_file.hpp:829
#define R_OK
Definition: tmp.o.cpp:486
virtual int_fast32_t processBlock(unsigned char *block, size_t blockLen)
Definition: read_file.hpp:436
virtual int processLine(const unsigned char *line, size_t lineLen)
Definition: read_file.hpp:423
#define VIRTUAL_OVERRIDE
Generates override if the compiler supports it.
Definition: compiler_hints.h:435
#define OME_ALWAYS_OPTIMIZE(level)
Mark a function to be compiled with a specific level of optimization.
Definition: compiler_hints.h:406
@ READ_HEADER_LINE
Definition: read_file.hpp:589
int readAndProcessFile()
Process file contents with no imposed structure.
Definition: read_file.hpp:397
size_t appendDataToBuffer(const unsigned char *newData, size_t bytesToAdd, size_t startFromOffset=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Convenience routine to append data to the buffer.
Definition: read_file.hpp:215
LogMaskType_t COMPONENT_LOG_MASK() io("io_logMask", &DEFAULT_sharedMemoryVariableManager, COMPONENT_LEVEL(io, warn)|COMPONENT_LEVEL(io, error)|COMPONENT_LEVEL(io, fatal))
#define _INVALID_DESCRIPTOR
Platform-independent reference to invalid descriptor.
Definition: logging_api.hpp:2618
#define INVALID_HANDLE_VALUE
Definition: poll_monitor.hpp:19
@ FIND_NOTHING
Definition: read_file.hpp:459
virtual void noteDataRead(const unsigned char *bfr, size_t bfrLen) const
user-exit to see original copy of any data read
Definition: read_file.hpp:362
@ EXTERNAL_SEGMENT
flag indicates buffer is in external segment
Definition: read_file.hpp:41
int findAndProcessNextLine(File_Buffer *fileBfr, bool hasHeaderLine=false)
Process next text line from buffer.
Definition: read_file.cpp:525
void setReadRoutine(ReadDataFP altRoutine) OME_ALWAYS_INLINE
Set a new file read routine.
Definition: read_file.hpp:355
virtual int completedFile(int recordsSeen) VIRTUAL_OVERRIDE
User-exit to notify that file has been completely read.
Definition: read_file.cpp:649
#define AS_HEXADECIMAL_BUFFER(d,...)
Convenience label to enable passing buffer with known length to output operator<<() but output the by...
Definition: logging_api.hpp:2135
ReadDataFP readInterfaceRoutine
Definition: read_file.hpp:259
Scan HTTP and parse stream for HTTP requests/responses.
Definition: read_file.hpp:584
size_t spaceLeft() const OME_ALWAYS_INLINE
Returns the amount of unused space in the buffer.
Definition: read_file.hpp:119
Describes magic numbers needed to identify a file's type and the routine capable of decoding a file's...
Definition: read_file.hpp:258
void adjustBufferStart(size_t amount) OME_ALWAYS_INLINE
Adjusts the start of the active content in the buffer.
Definition: read_file.hpp:163
size_t documentStartLen
Definition: read_file.hpp:468
int parseIntoFields(int maxFields, char *fieldStart[], unsigned char *line, size_t lineLen)
Parse text line into fields.
Definition: read_file.cpp:671
uint16_t initialBytesToMatchMask
Definition: read_file.hpp:262
virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen)
User-exit to process block of raw data.
Definition: read_file.hpp:574
virtual int processHTTPresponse(unsigned char *line, size_t lineLen)
User-exit for HTTP response line.
Definition: read_file.hpp:633
@ FIND_DOC_START
Definition: read_file.hpp:460
Read_And_Process_File(OS_HANDLE_TYPE srcDescriptor, ReadModes mode=READ_NORMAL, const FileTypeReaderSelector *selectorTable=nullptr)
Construct from an existing file descriptor.
Definition: read_file.cpp:408
char httpRequest[16]
Definition: read_file.hpp:600
const char srcID[]
Definition: catSym.c:17
virtual ~Parse_And_Process_HTTP_Stream()
Definition: read_file.hpp:618
int readAndProcessBlocksFromFile(size_t recordLength)
Process fixed length records.
Definition: read_file.cpp:502
@ WRITE_APPEND
Definition: read_file.hpp:248
uint32_t _explicitAlignmentPadding
Definition: read_file.hpp:285
unsigned char * buffer
base of the buffer segment
Definition: read_file.hpp:44
char requestURL[1024]
Definition: read_file.hpp:601
@ READ_RESPONSE
Definition: read_file.hpp:587
int processDocument(unsigned char *docStart, size_t docLen) VIRTUAL_OVERRIDE
User-exit to process extracted document.
Definition: read_file.hpp:775
#define OME_EXPECT_TRUE(expr)
Annotation macro for conditional expression expected to be true.
Definition: compiler_hints.h:541
static int findFileInPathsWithSuffixes(char *path, uint_fast32_t pathLen, const char *searchRootPaths, const char *possibleFilenames="", const char *possibleSuffixes="")
Search for a file using a combination of directory roots and file suffixes.
Definition: read_file.cpp:289
#define MAX_CHARS_IN_A_BYTE
Definition: OMEstring.cpp:14
virtual void returnBlock(SharedBufferAllocRecord *record)=0
virtual int completedFile(int recordsSeen)
Definition: read_file.hpp:420
@ READ_LZ4
Definition: read_file.hpp:247
uint_fast32_t text2uint32(const char *textString, uint_fast8_t text_len) NONNULL_PARAMETERS(1) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Convert a sequence of text characters into an unsigned integer as quickly as possible....
Definition: text2int.h:79
bool doHeaderLine
Definition: read_file.hpp:839
size_t documentEndLen
Definition: read_file.hpp:469
CSV_File(OS_HANDLE_TYPE descriptor, bool hasHeader=true, char sepChar=',')
Parse conventional CSV file.
Definition: read_file.cpp:591
enum ReadModes readMode
Definition: read_file.hpp:284
ReadDataFP readRoutine
Definition: read_file.hpp:280
#define NULL
Definition: tmp.o.cpp:327
@ READ_CONTENT_BODY
Definition: read_file.hpp:590
Fundamental class that can read data from a memory-mapped region, file or socket and process data in ...
Definition: read_file.hpp:244
LogMaskType_t COMPONENT_LOG_MASK() app("app_logMask", &DEFAULT_sharedMemoryVariableManager, COMPONENT_LEVEL(app, defaultMask))
int readAndProcessTextLines(bool hasHeaderLine=false)
Process text lines.
Definition: read_file.cpp:559
int getFieldIndex(const char *fieldHeading) const
Return the relative subscript for a named column.
Definition: read_file.cpp:655
uint32_t readsPerformed
Definition: read_file.hpp:836
void discardInitialBytes(size_t bytes=0) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Discards bytes from the beginning of the active portion of the buffer; typically as a result of havin...
Definition: read_file.hpp:171
virtual int_fast32_t processBlock(unsigned char *blockStart, size_t blockLen) VIRTUAL_OVERRIDE
User-exit to process block of raw data.
Definition: read_file.hpp:765
const char srcID[] OME_USED
Definition: tick_time.cpp:24
char fileTypeDescription[24]
Definition: read_file.hpp:260
File_Buffer(unsigned char *existingBuffer, size_t bfrSize, size_t currentLen=0, uint8_t ownershipFlags=EXTERNAL_SEGMENT)
Accepts the use (and potentially transfers ownership) of an existing buffer that may already have som...
Definition: read_file.hpp:75
unsigned char quoteListLen
Definition: read_file.hpp:838
virtual int processHTTPheader(unsigned char *line, size_t lineLen)
User exit for HTTP header line.
Definition: read_file.hpp:732
@ IS_READ_ONLY
flag indicates buffer is read-only
Definition: read_file.hpp:42
virtual int processBuffer(File_Buffer *bfrState)
Definition: read_file.hpp:442
#define AS_TEXT_BUFFER(s,...)
Convenience label to enable passing text with known length to output operator<<().
Definition: logging_api.hpp:2087
unsigned char documentEndTag[64]
Definition: read_file.hpp:465
size_t spaceUsed() const OME_ALWAYS_INLINE
Returns the amount of space current used in the buffer.
Definition: read_file.hpp:125
enum ReadModes modeToSelect
Definition: read_file.hpp:261
ParseState parseState
Definition: read_file.hpp:593
OS_HANDLE_TYPE descriptor
Definition: read_file.hpp:282
@ FIND_BLOCK_LENGTH
Definition: read_file.hpp:462
static const FileTypeReaderSelector * findTypeOfFile(const char *fileName, const FileTypeReaderSelector *selectorTable)
Definition: read_file.cpp:375
ParseState restartParseState
Definition: read_file.hpp:594
#define _STDIN_FD
Platform-independent reference to standard in.
Definition: logging_api.hpp:2615
int(* ReadDataFP)(class Read_And_Process_File *input, unsigned char *bfr, uint32_t bfrLen)
Definition: read_file.hpp:251
unsigned char * startBufferLocation() const OME_ALWAYS_INLINE
Returns the start of the active content in the buffer.
Definition: read_file.hpp:131
#define OME_EXPECT_FALSE(expr)
Annotation macro for conditional expression expected to be false.
Definition: compiler_hints.h:540
#define LOG_COMPONENT_CERR(component, lvl)
Convenience macro that uses LOG_COMPONENT_INTO to conditionally log a message to standard error.
Definition: logging_api.hpp:3030
@ READ_ZLIB
Definition: read_file.hpp:247
#define OME_ALWAYS_INLINE
Tell the compiler to alway inline a function, regardless of optimization level.
Definition: compiler_hints.h:364
#define _STDOUT_FD
Platform-independent reference to standard out.
Definition: logging_api.hpp:2616
size_t endBufferOffset
offset of last byte used in buffer
Definition: read_file.hpp:46
uint32_t recordsProcessed
Definition: read_file.hpp:283
void adjustBufferEnd(size_t amount) OME_ALWAYS_INLINE
Adjusts the end of the buffer. This is typically called after new content has been added.
Definition: read_file.hpp:147
virtual ~Read_And_Process_File()
Definition: read_file.cpp:453
virtual int parsedHeadingLine()
User-exit invoked when initial header line is parsed.
Definition: read_file.hpp:960
int addIOblockThenProcess(SharedBufferAllocRecord *rec, IO_Processor *controller)
Specialized interface to add incoming block from BufferRegion maintained by an IO_Processor....
Definition: read_file.hpp:541
char httpVersion[16]
Definition: read_file.hpp:597
unsigned char quoteCharList[8]
Definition: read_file.hpp:833
BufferRegion * bfrManager
buffer region
Definition: io_processor.hpp:194
uint8_t externalBufferFlags
flags indicating read-only, external
Definition: read_file.hpp:48
#define LOG_ENDLINE
Closing clause for text line output using << operators.
Definition: logging_api.hpp:2956
Read and process Comma-Separated-Value (or equivalent) files a record at a time.
Definition: read_file.hpp:827
#define OS_HANDLE_TYPE
Definition: io_processor.hpp:48
int processPacket(unsigned char *data, size_t len)
Streaming interface equivalent to readAndProcessCSV_File(). API is compatible with processPacketUsing...
Definition: read_file.hpp:924
char statusCode[8]
Definition: read_file.hpp:598
@ WRITE
Definition: read_file.hpp:248
int scanForDocument()
Definition: read_file.cpp:122
static uint32_t matchFileHeader(const FileTypeReaderSelector *criteria, const unsigned char *fileHdr, const size_t hdrLen)
Definition: read_file.cpp:352
Scan streams of data for start and end tags, invokes processDocument() on found content.
Definition: read_file.hpp:456
void useRegionAsBuffer(unsigned char *existingBuffer, size_t bfrSize, size_t currentLen, uint8_t ownershipFlags) OME_ALWAYS_INLINE OME_ALWAYS_OPTIMIZE("-O3")
Replace the existing buffer with a new region.
Definition: read_file.hpp:105
ParseState
Definition: read_file.hpp:586
@ WRITE_TRUNCATE
Definition: read_file.hpp:248
const char * findHeaderAttribute(const char *headerElement) const
Definition: read_file.hpp:620
virtual int processHeaderLine(unsigned char *line, size_t lineLen) VIRTUAL_OVERRIDE
Process initial header line in file.
Definition: read_file.cpp:636
int scanAndProcessBuffer(File_Buffer *bfrState)
Definition: read_file.hpp:523
enum Extract_And_Process_Document_Stream::@13 scanState
@ READ_GUESS
Definition: read_file.hpp:247
size_t bufferLen
total length of buffer segment
Definition: read_file.hpp:45
@ READ_PCAP
Definition: read_file.hpp:247
virtual int processHTTPrequest(unsigned char *line, size_t lineLen)
User-exit for HTTP request line.
Definition: read_file.hpp:683
int fd
Definition: ethers.c:41
char * fields[MAX_FIELDS]
Definition: read_file.hpp:834
File_Buffer * intermediateBuffer
Definition: read_file.hpp:278
virtual int beginFile()
Definition: read_file.hpp:417
virtual ~CSV_File()
Definition: read_file.hpp:867
ReadModes
Definition: read_file.hpp:246
int fieldTotal
Definition: read_file.hpp:835
#define LOG_CERR(lvl)
Convenience macro that uses LOG_INTO() to conditionally log a message to standard error.
Definition: logging_api.hpp:3014
@ READ_NORMAL
Definition: read_file.hpp:247