#ifndef LIB7_H
#define LIB7_H

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <inttypes.h>
#include <stddef.h>

// ini.h ///////////////////
/** \file
 * @brief ini file parser
 * 
 * 2020, Simon Wunderlin
 */


/**
 * @brief pre-allocate items 
 * 
 * we are dynamically pre-allocating child items so we don't have
 * to call realloc() on every item added. If you have large structures
 * with many items settin this to a higher value might benefit 
 * performance (less realloc calls).
 * 
 * default: 10
 */
#define ALLOC_NUM_ITEMS 10

/**
 * @brief parser positions
 * This structure holds the last known position of element boundaries
 */
typedef struct {
	/** last section start position */
	size_t section_start;
	/** last section end position */
	size_t section_end;
	/** last item start position */
	size_t item_start;
	/** last item end position */
	size_t item_end;
	/** position of last equal sign (k/v delimiter) */
	size_t item_equal;
	/** start pos of last comment */
	size_t item_comment;
} last_pos_t;

/**
 * @brief key value pair of an ini file, delimited by `=`
 * This structure defines a key/value pair of an ini file
 */
typedef struct {
	/** start position in file */
	int start; 
	/** end position in file */
	int end;   
	/** key */
	char* name;
	/** value */
	char* value; 
} ini_item_t;

/**
 * @brief a section, that contains N ini_item_t
 * this structure holds a section with N items 
 */
typedef struct {
	/** name of the section, this is that part in brackets `[` and `]` */
	char* name;
	/** number of ini_items_t in this section */
	int length;  
	/** max allocated items */
	int size;
	/** array holding the items (k/v pairs) */
	ini_item_t **items;
} ini_section_t;

/**
 * @brief container holding ections
 * Top level strcture, holds N sections.
 */
typedef struct {	
	/** number of sections */
	int length;
	/** max allocated space in sections */
	int size;
	/** the array holding sections */
	ini_section_t **sections;
} ini_section_list_t;

/**
 * @brief constructor 
 * 
 * PArses an ini file. make sure `fp` points to the beginning of the file 
 * before invoking.
 * 
 * @param fp file pointer
 * @returns section list
 */
ini_section_list_t *ini_parse(FILE *fp);

/**
 * @brief free memory
 * 
 * @param ini parsed structure
 */
void ini_free(ini_section_list_t *ini);

/**
 * @brief find sections and keys in ini structure
 * 
 * @param ini structure to search in
 * @param section_name section to search for
 * @returns pointer to section or NULL if  was not found
 */
ini_section_t *ini_find_section(ini_section_list_t *ini, 
                                const char* section_name);
/**
 * @brief search for key in section
 * 
 * @param s section to search in
 * @param key name of the key to search for
 * @returns reference to the key/value pair or NULL if not found
 */
ini_item_t *ini_find_key(ini_section_t *s, const char* key);

/**
 * @brief get value of key in section
 * 
 * get a value for a specific item in a section. returns NULL if not found
 * 
 * @param ini section list to search in
 * @param section name of the section to search in
 * @param key key to search for in section
 * @returns value if the key is found in section otherwise `NULL`
 */
char *ini_get_value(ini_section_list_t *ini,
                const char* section, const char* key);

// buffer.h ///////////////////
/** \file
 * @brief generic growing buffer library
 */

/**
 * @brief dynamically growing generic bytearray
 */
typedef struct buf {
    /** length of the content */
    int length;
    /** length of the allocated memory (will dynamically be raised if too small) */
    int allocated;
    /** byte array containg the data */
    unsigned char *buffer;
} buf_t;

/**
 * @brief allocate initial memory
 */
buf_t *new_buf_t();

/**
 * @brief release all memory of buffer
 * 
 * @param buffer
 */
void free_buf(buf_t *buffer);

/**
 * @brief add data to buffer
 *
 * we excpect buffer always to be at least 1 byte long and 
 * terminated by `\0`.
 * 
 * The output buffer will always be `\0` terminated.
 * 
 * to prevent constant reallocs() on small chunks, the allocated size
 * `buf_t.allocated` is doubled when more memory is required.
 * 
 * If a large chunk is added that is larger than `buf_t.allocated * 2` then 
 * the current size + the size of the new chunk is calculated and reallocated.
 * 
 * @param buffer the buffer to work on
 * @param size size of the new chunk
 * @param data byte array to papend
 * @returns 0 on success, error code otherwise
 */
int append_buf_str(buf_t *buffer, int size, unsigned char* data);

/**
 * @brief append a character
 * 
 * slightly faster than `append_buf_str(...)` which takes a C character as input.
 * 
 * @param buffer the buffer to work on
 * @param c the character to add
 * @returns 0 on success, error code otherwise
 */
int append_bufc(buf_t *buffer, unsigned char c);

// address.h ///////////////////
/** \file
 * @brief hl7 address structures and utilities
 * 
 * Parse address strings into hl7_addr_t and vice versa. All string addresses 
 * start with `1` for the first element. This is an end-user interface. All
 * internal addresses in hl7_addr_t start with `0` whereis `-1` 
 * means `undefined`.
 * 
 * @note
 * All elements in hl7_addr_t are intialized to `-1`. If a address string 
 * is parsed with set_addr_from_string, all elements which remain at `-1`
 * have not been specified in the string (thus are unset).
 * 
 * ### HL7 adressing
 * 
 * String addresses are written i nthe following format, they are translated
 * into an efficient struct hl7_addr_t:
 * 
 * ```
 * SEG(3)-1(2).3.4
 *  ^  ^  ^ ^  ^ ^
 *  |  |  | |  | +-- optional: hl7_addr_t.subcmp     (delimited by &)
 *  |  |  | |  +---- optional: hl7_addr_t.comp       (delimited by ^)
 *  |  |  | +------- optional: hl7_addr_t.field      (delimited by ~)
 *  |  |  +--------- optional: hl7_addr_t.fieldlist  (delimited by |)
 *  |  +------------ optional: hl7_addr_t.seg_count  (segement repetition)
 *  +--------------- required: hl7_addr_t.segment    (delimited by \r)
 * ```
 * 
 */


/**
 * @brief hl7 element address
 * 
 * All addresses start with `0`. `-1` means undefined (not specified when a 
 * string was converted to hl7_addr_t).
 * 
 * Some obvious defaults must be taken into account by the user. For 
 * example, if `field` is `-1`, then it always should be treated as `0`, 
 * because there is no element with an undefined number of fields (repetitions),
 * if not specified, the first element hast to be used.
 * 
 * Same for `seg_count`, if it is not specified (and therefore equals `-1`) 
 * the first index `0` is meant.
 */
typedef struct hl7_addr_t {
    /** segment name, typically 3 byte upper case letters */
    char* segment;
    /** the data between the hl7_meta_t.sep_field (typically `|`) */
    int fieldlist;
    /** the data between the hl7_meta_t.sep_rep (typically `~`), fields can have repetitions. typically these are omited in addresses. */
    int field;
    /** the data between the hl7_meta_t.sep_comp (typically `^`) */
    int comp;
    /** the data between the hl7_meta_t.sep_subcmp (typically `&`) */
    int subcmp;
    /** the address of the n-th segment in a file. If you have 3 `oBX` segments and you want the 2nd, then use `OBX(2)` to specifically address the second */
    int seg_count;
} hl7_addr_t;

/**
 * @brief keep track of the number of the same segments in a message_t
 * 
 * This struct keeps track of the number of segments with the same name. This is
 * important when search certain elements, to make sure we do not overrun 
 * buffers. 
 * 
 * All memory is allocated dynamically, `_allocated` keeps track of the length
 * of allocated space in `count` and `segments`. `length` keeps track of the 
 * used elements.
 * 
 * The count of a `segment` can be foudn at the same index in `count`.
 */
typedef struct seg_count {
    /** number of elements stored in count and segments */
    unsigned int length;
    /** size of allocated elements in count and segments */
    unsigned int _allocated;
    /** array of segment names */
    char** segments;
    /** array of numbers of segments */
    int* count;
} seg_count_t;




/**
 * @brief reset to default values
 *  
 * set all values ot `-1` and segment to `NULL`.
 * 
 * @param addr address struct to reset
 */
hl7_addr_t *reset_addr(hl7_addr_t *addr);

/**
 * @brief create address structure
 * 
 * malloc() an hl7_addr_t struct and set all values to default values (reset_addr())
 * 
 * @returns empty address struct
 */
hl7_addr_t* create_addr();

/**
 * @brief parse string adresss
 * 
 * shorthand for set_addr_from_string() which also allocates hl7_addr_t.
 * 
 * @see set_addr_from_string()
 * @param str string address, see create_addr for format
 * @returns parsed address as structure, NULL on error
 */
hl7_addr_t* addr_from_string(char* str);

/**
 * @brief set address by string
 * 
 * Parses a string address, and returns ret_addr srtuct containing 
 * the string definition. ret_addr will first be reset.
 * 
 * returns `0` on succes and the following error codes otherwise:
 * - ` -1`: string length 0
 * - ` -2`: segment count not ended by `)`
 * - ` -3`: field address must start with `-`
 * - ` -4`: field address is not a digit
 * - ` -5`: expected `(` or `.` after field
 * - ` -6`: invalid field address, must at least set SEG-N
 * - ` -7`: field repetition `(` must be followed by digit
 * - ` -8`: field repetition may only contain digits between `(` and `)`
 * - ` -9`: premature end in field repetition (fieldlist)
 * - `-10`: preamture end, expecting component address after `.`
 * - `-11`: value error, only digits allowed in component address after `.`
 * - `-12`: premature end, comp address is empty
 * - `-13`: invalid character followed after comp
 * - `-14`: '.' for subcomp must be followed by an address, end of string reached
 * - `-15`: premature end, subcomp address is empty
 * 
 * @param str string address
 * @param ret_addr address struct which will contain the string address
 * @returns 0 on success, error code otherwise
 */
int set_addr_from_string(char* str, hl7_addr_t **ret_addr);

/**
 * @brief create a string representation of the address
 * 
 * Allocates memory for a string and rturns the string representation of addr.
 * 
 * @param addr our address
 * @returns string representation of address
 */
char* addr_to_string(hl7_addr_t* addr);

/**
 * @brief free hl7_addr_t struct
 * @param addr address to free
 */
void free_addr(hl7_addr_t* addr);

/**
 * @brief dispaly address
 * 
 * Debug method, will printf() the content of hl7_addr_t
 * 
 * @param addr address to dump
 */
void addr_dump(hl7_addr_t* addr);

/**
 * @brief clone an address
 * 
 * You must make sure to completely deallocate the newly created address, @see free_add().
 * 
 * @param addr address to clone
 * @returns copy of address
 */
hl7_addr_t* clone_addr(hl7_addr_t* addr);

/**
 * @brief Create segment struct
 * 
 * @returns seg_count_t struct with allocated memory
 */
seg_count_t* create_seg_count();

/**
 * @brief free segment count struct
 * 
 * @param segc strcut to clear
 */
void free_seg_count(seg_count_t* segc);

/**
 * @brief increment count for segment
 * 
 * This method dynamically allocates more memory for segment and count if required
 * 
 * @note
 * segments must be 3 char long strings. no bounds checking is done
 * 
 * @param segment string segment
 * @param segc segment struct
 * @returns new count for provided segment
 */
int add_seg_count(char* segment, seg_count_t* segc);

/**
 * @brief get count for segment name
 * 
 * get the count for a specific segment
 * 
 * @param segment string name of the segment
 * @param segc segment struct
 * @returns count for segment, 0 if not found
 */
int get_seg_count(char* segment, seg_count_t* segc);


// decode.h ///////////////////
/** @file
 * @brief primary hl7 parse methods
 * 
 * high level interface to be exposed to bindings
 */




/**
 * @brief parser version information
 * @returns version string as defined in CMakeLists.txt
 */
const char *version_parser();

/**
 * @brief pretty print system errors
 * 
 * @deprecated replace with log_error()
 * @param e Error code
 * @param additional Extra string to print
 */
void print_error(int e, char* additional);

/**
 * @brief extract a sub string from a string
 * 
 * This method expects the string's length without `\0` character.
 * It will allocate a new buffer (including delimiting `\0`) and
 * copy all data into the new buffer. The buffer wil lthen be 
 * delimited with `\0`.
 * 
 * The user must take care to properly free() the newly 
 * allcoated string.
 * 
 * @note
 * we do not check if you try to read past the end of 
 * buffer, you must take care of that!
 * 
 * @param start position of the substrings first character
 * @param length string length without terminating \0
 * @param buffer the srouce string
 * @return new char buffer
 */
unsigned char* extract_substr(int start, int length, unsigned char *buffer);

/**
 * @brief parse one HL7 line
 * 
 * Please use read_meta() before using this method on the first line of the 
 * HL7 file. It might work without (if the HL7 file uses default delimiters) 
 * but is not safe.
 * 
 * This method reads one line of the HL7 file and parses it into a segment 
 * structure for node_t's. Meta will be updated once we reach end of line. 
 * `meta->crlf` and `meta->sep_message` will be updated when the first `\n` 
 * or `\r` character is found if `meta->crlf` is `-1` (default value, no `EOL`
 * detection happened yet).
 * 
 * `meta->crlf` and `meta->sep_message` will only be updated on the first 
 * pass, then cached data will be used to speed up the parsing process.
 * 
 * @note
 * do not use meta on different files, always use a new meta data structure 
 * on a new file
 * 
 * Error codes:
 *  - `1`: failed to allocate memory for line buffer
 *  - `2`: failed to allocate more memory for line buffer
 *  - `3`: EOF was reached unexpectedly
 *  - `4`: maximum delimiters per segment is reached, raise MAX_FIELDS
 *  - `5`: failed to allocate raw_field @see create_raw_field_t()
 *  - `6`: failed to allocate node_t @see create_node_t()
 *  - `7`: failed to process sub field elements @see process_node()
 *  - `8`: failed to append child @see node_append()
 *  - `9`: failed to allocate memory for segment name
 *  - `10`: File did not start with 'MSH'
 *  - `11`: MSH-2 was not delimited by MSH-1, FIXME: these error codes may overlap where we use ret+10 now!
 *  - `12`: Segment name is longer than 5 bytes
 * 
 * @param fd File descriptor, must be forwarded on the last byte of the BOM (if any)
 * @param meta must be initialized at least with correct delimimters. this is typically done by `parse_msh()`
 * @param fieldlist_p this is the node tree of the segment
 * @param segment_name will hold the segment name, eg. "PID", "OBX", ...
 * @return 0 on success, else error code
 */
int parse_segment(FILE *fd, hl7_meta_t* meta, node_t **fieldlist_p, unsigned char **segment_name);

/**
 * @brief Entry point for the parser
 * 
 * The parser has 2 modes:
 * 1. parse first line up until `\r` or `\n` is found. Detirmine 
 *    delimiters, update meta @see hl7_delimiters()
 * 2. parse the rest of the file after the first line break, splitting 
 *    fields into internal data structures
 * 
 * if you want to analyze parts of the document while the parser is still 
 * running or you are using it in a multi-threaded environment, then there is 
 * the possibility to add callback functions. The following callback hooks 
 * are available:
 * - `void (*cb_progress)(message_t *message, size_t total, size_t current);`
 * - `void (*cb_start)(message_t *message)`
 * - `void (*cb_end)(message_t *message, size_t max, size_t current, int exit_code)`
 * - `void (*cb_segment)(message_t *message, size_t num, char name[3])`
 * 
 * Also, you can controll how often the `cb_progress()` callback is fired, 
 * default is every 1% of progress (does not fire on files smaller 
 * than 100 bytes).
 * 
 * @see message_state.h for more information on callback functions.
 * 
 * @param fd File descriptor, must be forwarded on the last byte of the BOM (if any)
 * @param message_p this must be an initialized message_t object.
 * @return 0 on success, 1 of message_p is NULL, 2 if create_node fails on first segment (meta detection), 3 if first segment is not "MSH", > 10 parse_segment() errors
 */
int hl7_decode(FILE* fd, message_t **message_p);

/**
 * @brief parse hl7 file 
 * 
 * This method gives more control of the metadata than hl7_decode(). You may 
 * do your own delimiter detection and store the information in a meta_t struct.
 * 
 * Make sure to set meta_t.crlf to something other than `-1` (default) 
 * otherwise the parser will try to detect the delimiters again from the first 
 * line it is fed.
 * 
 * @param fd file pointer
 * @param meta prepared metadata with `meta_t.clrf != -1` to prevent automagic meta detection by hl7_decode()
 */
message_t *decode(FILE* fd, hl7_meta_t *meta);

/**
 * @brief open file
 * 
 * Convencience function, mainly used for language bindings
 * 
 * @param filename path to file
 * @returns FILE* or NULL on error
 */
FILE* hl7_open(char* filename);

/**
 * @brief close file
 * 
 * Convencience function, mainly used for language bindings
 * 
 * @param fd file pointer
 * @returns return code of `fclose()`
 */
int hl7_close(FILE* fd);



// encode.h ///////////////////
/** \file
 * @brief HL7 escape and unsescape functions
 * 
 * http://healthstandards.com/blog/2006/11/02/hl7-escape-sequences/
 * 
 * The following escape sequences are implemented. More may follow 
 * but for performance reasons we do not support all (yet?):
 * 
 * |     | Character | Description Conversion                                  |
 * |---- |---------- |-------------------------------------------------------- |
 * | ✓ | `\E\`       | Escape character converted to escape character (e.g., `\`) |
 * | ✓ | `\F\`       | Field separator converted to field separator character (e.g., pipe) |
 * | ✓ | `\R\`       | Repetition separator converted to repetition separator character (e.g., `~`) |
 * | ✓ | `\S\`       | Component separator converted to component separator character (e.g., `^`) |
 * | ✓ | `\T\`       | Subcomponent separator converted to subcomponent separator character (e.g., `&`) |
 * | ✓ | `\.br\`     | Begin new output line. Set the horizontal position to the current left margin and increment the vertical position by 1. |
 * | ✘ | `\.sp <n>\` | End current output line and skip `<number>` vertical spaces. `<number>` is a positive integer or absent. If `<n>` is absent, skip one space. The horizontal character position remains unchanged. Note that for purposes of compatibility with previous versions of HL7, `^\.sp\` is equivalent to `\.br\`. |
 * | ✘ | `\.fi\`     | Begin word wrap or fill mode. This is the default state. It can be changed to a no-wrap mode using the `\.nf\` command.                                            |
 * | ✘ | `\.nf\`     | Begin no-wrap mode. |
 * | ✘ | `\.in <n>\` | Indent `<number>` of spaces, where `<number>` is a positive or negative integer. This command cannot appear after the first printable character of a line. |
 * | ✘ | `\.ti <n>\` | Temporarily indent `<number>` of spaces where number is a positive or negative integer. This command cannot appear after the first printable character of a line. |
 * | ✘ | `\.sk <n>\` | Skip `<number>` spaces to the right.                      |
 * | ✘ | `\.ce\`     | End current output line and center the next line.       |
 * | ✘ | `\H\`       | Start highlighting not converted                        |
 * | ✘ | `\N\`       | Normal text (end highlighting) not converted            |
 * | ✘ | `\Xdd…\`    | Hexadecimal data (dd must be hexadecimal characters) converted to the characters identified by each pair of digits. |
 * | ✘ | `\Cxxyy\`   | Single-byte character set escape sequence with two hexadecimal values not converted |
 * | ✘ | `\Mxxyyzz\` | Multi-byte character set escape sequence with two or three hexadecimal values (zz is optional) not converted |
 * | ✘ | `\Zdd…\`    | Locally defined escape sequence not converted           |
 */



//unsigned char* hl7_escape(unsigned char *inp, int length, hl7_meta_t *meta);
int hl7_escape(unsigned char *inp, unsigned char **outp, int in_length, int *out_length, hl7_meta_t *meta);

//unsigned char* hl7_unescape(unsigned char *inp, int length, hl7_meta_t *meta);
int hl7_unescape(unsigned char *inp, unsigned char **outp, int in_length, int *out_length, hl7_meta_t *meta);

// base64.h ///////////////////
/** \file
 * @brief hl7 base64 decoder and encoder. 
 * 
 * These methods are able to decode hl7 sequence \.br\ on a base64 stream
 */
// https://en.wikibooks.org/wiki/Algorithm_Implementation/Miscellaneous/Base64#C_2



/**
 * @brief decode base64 a buffer
 * 
 * @param in char array to decode
 * @param inLen length of input
 * @param out array holding output
 * @param outLen length of output
 * @return 0 on success, 1 invalid input
 */
int base64decode(char *in, size_t inLen, unsigned char *out, size_t *outLen);

/**
 * @brief decode base64 a buffer
 * 
 * skip `\.br\` sequences typically found in hl7 base64 encoded multiline strings.
 * 
 * @param in char array to decode
 * @param inLen length of input
 * @param out array holding output
 * @param outLen length of output
 * @return 0 on success, 1 invalid input, 2 invalid escape sequence
 */
int hl7_64decode(char *in, size_t inLen, unsigned char *out, size_t *outLen);

/**
 * @brief decode base64 buffer to a file
 * 
 * skip `\.br\` sequences typically found in hl7 base64 encoded multiline strings.
 * 
 * @param in char array to decode
 * @param inLen length of input
 * @param out_fd file handle to write result to
 * @return 0 on success, 1 invalid input, 2 invalid escape sequence
 */
int hl7_64decode_fd(char *in, size_t inLen, FILE *out_fd);

/**
 * @brief encode base64
 * 
 * @param data_buf char array to encode
 * @param dataLength length of input
 * @param result array holding output
 * @param resultSize length of output
 * @return 0 on success, invalid input
 */
int base64encode(const void* data_buf, size_t dataLength, char* result, size_t resultSize);

// bom.h ///////////////////
/** \file
 * @brief find unicode bom
 * 
 * When parsing an HL7 file, the opened file pointer should be at the 
 * beginning of data (typically just at the beginning of `MSH`).
 * 
 * If the file contains a unicode BOM, and the file pointer points at the 
 * beginning  of the file, the parser will fail. Therefore we first must skip 
 * the BOM bytes.
 * 
 * This is a crude method of detecting if the file has a BOM. Alternatively
 * you may deploy you own method and just skip ahead until you know the file 
 * pointer is at the first character of data (at the beginning of `MSH`) 
 * before parsing the file.
 * 
 * ### how it's done:
 * 
 * we try to detect known BOM patterns and then place the pointer just after it.
 * known patterns:
 * 
 * 2 Bytes
 * - UTF-16 LE `0xFF 0xFE`
 * - UTF-16 BE `0xFE 0xFF`
 * 
 * 3 Bytes
 * - UTF-8 `0xEF 0xBB 0xBF`
 * - UTF-1 `0xF7 0x64 0x4C`
 * - SCSU `0x0E 0xFE 0xFF`
 * - BOCU-1 `0xFB 0xEE 0xFF`
 * 
 * 4 Bytes
 * - UTF-7 `0x2B 0x2F 0x76` // Followed by 38, 39, 2B, or 2F (ASCII 8, 9, + or /), depending on what the next character is.
 * - UTF-32 BE `0x00 0x00 0xFF 0xFF`
 * - UTF-32 LE `0xFF 0xFE 0x00 0x00`
 * - UTF-EBCDIC `0xDD 0x73 0x66 0x73`
 * - GB-18030 `0x84 0x31 0x95 0x33`
 * 
 * ## usage
 * ```C
 *  * 
 * FILE *fd = fopen(some/file, 'rb');
 * rewind(fd); // make sure the file pointer is at the beginning of the file
 * bom_t* detect_bom(fd);
 * 
 * ```
 */


/**
 * @brief endianness detected in bom
 */
typedef enum {
    /** undetected */
    UNKNOWN,
    /** little endian */
    LITTLE,
    /** big endian */
    BIG,
    /** smaller than 16 bit, it doesn't matter */
    SIGNATURE
} bom_endianness_t;

/**
 * @brief Byte Order MArk (BOM) information of a file.
 * This struct is created by detect_bom()
 */
typedef struct bom_t {
    /** contanis the raw bytes of the bom */
    char *bom;
    /** the length of the bom */
    int length;
    /** endianness */
    bom_endianness_t endianness;
} bom_t;


/**
 * @brief hex representation of the bom
 * 
 * @param length lenght of input buffer
 * @param bom byte array with the bom
 * @param endianness endianness to display
 * @returns printable string
 */
char * bom_to_string(int length, unsigned char *bom, bom_endianness_t endianness);

/**
 * @brief debug function to print bom
 * 
 * @param bom
 */
void print_bom(bom_t* bom);

/**
 * @brief check if the file has a bom
 * 
 * if there is a bom, it will be copied to `bom->bom`. The file pointer will be 
 * set to the first character after the bom.
 * 
 * To check if a bom has been detected, bom->length is greater than 0. Length 
 * represents the number of bytes bom->bom contains.
 * 
 * @note
 * The file pointer must be at the beginning of the file or this will fail. 
 * Either run detect_bom() right after opening a file or rewind before using.
 * 
 * @see https://en.wikipedia.org/wiki/Byte_order_mark
 * @param fd file handle to read data from
 * @return bom_t bom bytes are stored in `bom->bom`, length is indicated by `bom->length`
 */
bom_t* detect_bom(FILE *fd);

// meta.h ///////////////////
/** \file
 * @brief hl7 meta data structures for message_t
 * 
 * meta_t contains information in all hl7 separators of a file and the BOM.
 * 
 * read_meta() can detect the delimiters of a file pointed to by `*fd`.
 * 
 * find_line_delimiter() will seek the first `CR` or `LF` and will 
 * detect if `CRLF` is present. In case of `CRLF` the attribute `crlf` is 
 * set to 1 else 0 or -1 (initial value when no detection has happened yet).
 * 
 * https://docs.microsoft.com/en-us/biztalk/adapters-and-accelerators/accelerator-hl7/message-delimiters
 * 
 * | Delimiter | Value | Usage |
 * |-----------|-------|-------|
 * | Segment terminator | \<cr\> 0x0D | A carriage return terminates a segment record. You cannot change this value.
 * | Field separator | \| | A pipe character separates two adjacent data fields within a segment. This character also separates the segment ID from the first data field in each segment. |
 * | Component separator |	^ |A hat character separates adjacent components of data fields where allowed by the HL7 standard. |
 * | Repetition separator | ~ | A tilde character separates multiple occurrences of components or subcomponents in a field where allowed by the HL7 standard. |
 * | Escape character| \ |You use an escape character with any field that conforms to an ST, TX, or FT data type, or with the data (fourth) component of the ED data type. If no escape characters exist in a message, you can omit this character. However, you must include it if you use subcomponents in the message. |
 * | Subcomponent separator | & | An ampersand character separates adjacent subcomponents of data fields where allowed by the HL7 standard. If there are no subcomponents, then you can omit this character. |
 */


/**
 * @brief HL7 Seperator configuration
 * 
 * @see meta.h
 */
typedef struct hl7_meta_t {
    /** number of defined seperators */
    int field_length; 
    /** are messages delimited by one or two bytes?
     * - `-1` (unknown), initial state
     * - `0` delimiter is either `CR` or `LF`
     * - `1` delimiter is `CRLF`
     * 
     * NOTE: in case of `CRLF` `sep_message` will contain `CR`/`0x0D`
     */
    int crlf; 

    /** message separator, defaults to `LF`. in case of `CRLF` this one byte variable will be `CR`. use `hl7_meta_t->crlf` to check */
    char sep_message;
    /** field separator, default: `|` */
    char sep_field;
    /** component separator, default: `^` */
    char sep_comp;
    /** field repetition separator, default: `~` */
    char sep_rep;
    /** escape character, default: `\` */
    char sep_escape;
    /** sub component separator, default: `&` */
    char sep_subcmp;

    /** file encoding from `MSH-18` */
    char *encoding;
    /** hl7 version from `MSH-12` */
    char *version;
    /** message type from `MSH-9.1` */
    char *type;
    /** message sub type from `MSH-9.2` */
    char *subtype;
    /** bom bytes sequence and length */
    bom_t *bom;

} hl7_meta_t;

/**
 * @brief possible line endings
 */
typedef enum line_delimiter_t {
    /** undefined, not found */
    DELIM_NONE = 0,
    /** Default HL7: `0x0D` */
    DELIM_CR,  
    /** Unix: `0x0A` */
    DELIM_LF,  
    /** Windows: `0x0D`, `0x0A` (must be other than all others ;) */
    DELIM_CRLF 
} line_delimiter_t;


/**
 * @brief initialize the seperator data structure
 * 
 * @return initialized hl7_meta_t struct
 */
hl7_meta_t* init_hl7_meta_t(void);

/**
 * @brief generate printable string of the meta data
 * 
 * @param meta a hopefully populated meta data struct
 * @return formatted string
 */
char *hl7_meta_string(hl7_meta_t* meta);

/**
 * @brief free the data structure
 * 
 * make sure nothing points to the elements in this structure. All
 * elements in the structure will be freed (including `bom`).
 * 
 * @param hl7_meta the meta structure
 */
void free_hl7_meta(hl7_meta_t *hl7_meta);

/**
 * @brief find line delimiter
 * 
 * This method will forward an FILE pointer upon it finds the first 
 * `\n`, `\r` or `EOF`. The FILE pointer will be restored to it's 
 * original position.
 * 
 * @param fd file pointer at theb eginnign of a file 
 * @returns 0 if unknown.
 */
line_delimiter_t find_line_delimiter(FILE* fd);

/**
 * @brief read up until the 2nd field delimiter
 * 
 * read up to 8 bytes to find all hl7 delimiters. This method assumes 
 * that it is reading an MSH segment (file pointer must be positioned at 'M').
 * 
 * Do not use it on any other hl7 segment!
 * 
 * This metod will detect all separators if they are defined. If not, separators 
 * will be left at the default. 
 * 
 * The user must make sure to parse the right segment. fd must be 
 * pointing to the start character of an MSH or MSA segment.
 * 
 * The file pointer is reset at the end of this method and reset back to the 
 * byte after the BOM.
 * 
 * If no BOM detection has been done and meta->bom still is NULL, we'll 
 * run detect_bom() first. To prevent this, initialize the bom object 
 * with a length of 0.
 * 
 * @todo
 * check if file length, there might only be 5 characters in it or 
 * less. handle error condition.
 * 
 * @note
 * do not re-use hl7_meta for different files, always detect 
 * separators before parsing.
 * 
 * @note
 * this method fails if there is only one field separator, i.e. `MSH|` 
 * will return `1`. even tho it may or maynot be a valid hl7 file `MSH-2`
 * is not followed by `MSH-1` and therefore a reliable detection is 
 * not possible.
 * 
 * Return codes:
 * - `0`: success
 * - `1`: the field delimiter was not found after the delimiters
 * - `2`: premature file end
 * - `3`: file does not begin with `MSH`
 * 
 * @param hl7_meta pointer to an empty metadata structure
 * @param fd file pointer pointing to the beginning of MSH or MSA segment
 * @returns 0 on succes, error code otherwise
 */
int read_meta(hl7_meta_t *hl7_meta, FILE *fd);

// node.h ///////////////////
/** \file
 * @brief Main datastructures for HL7 nodes and messages
 * This file contains the main parser data structures and methods to read and parse hl7 files.
 *
 * All api functions defined here are used by the parser to create a 
 * node_t structure. For advanced node capabilities (for example editing)
 * see node_util.h experiemntal functions.
 */



/** @brief number of children to pre-allocate when there is not room in children anymore */
#define NODE_PREALLOC_CHILDREN 5

/** @brief number of children to pre-allocate when there is not room in children anymore */
#define MESSAGE_PREALLOC_CHILDREN 10

/** @brief maximum number of temporary elements to allocate in the parser buffer */
#define MAX_FIELDS 1000

/**
 * @brief structure to track delmiter fields in a fieldset
 * 
 * This is a private data structure for `parse_segment()`. It 
 * keeps track of all delimiters in a segment while looping 
 * over it byte by byte. `process_node()` the uses it to calculate 
 * bounds and structure of the message while extracting the 
 * data and structure.
 * 
 * This is a parser internal structure used to keep track of delimiters.
 * 
 * @note 
 * do not use outside of `parse_segment()`
 * 
 * @bug
 * move into parser, this is not being used by the node facilities at all.
 */
typedef struct raw_field_t {
    /** byte array containing the raw field character data including delimiters */
    unsigned char *field;
    /** array of delimiter characters  */
    unsigned char delim[MAX_FIELDS];
    /** array of positions of the above delimiting characters  */
    unsigned int  pos[MAX_FIELDS];
    /** length of the delimiter array  */
    unsigned int delim_l;
    /** length of the field's data */
    size_t length;
} raw_field_t;

/**
 * @brief Node types
 */
typedef enum node_type_t {
    /** root node, holding all segments */
    MESSAGE     = 1, 
    /** segment node, this is one lien in an hl7 file */
    SEGMENT     = 2, 
    /** fieldlist is everything btween | and |, respecting repetition characters ~ */
    FIELDLIST   = 4, 
    /** this is the actual field content, if there is a repetition delimiter ~ multiple of these fields are present, otherwise always one */
    FIELD       = 8, 
    /** the component delimited by ^ */
    COMP        = 16, 
    /** the sub componentent, delimited by & */
    SUBCOMP     = 32, 
    /** unused */
    LEAF        = 64 
} node_type_t;

/**
 * @brief primary storage type of a delimited element
 * 
 * Every hl7 element (regardless of place in the hirarchy) is represented 
 * by a node_t. The node_t.type defines where in the hirarchy the node is. Every 
 * node_t has a `parent` node.
 *
 * The top level node of a parsed HL7 message is always of type message_t. The 
 * Hirarchy looks like this:
 * - message_t this is the top level node
 *   - `node_t.type` == `SEGMENT` this is the segment node
 *     - `node_t.type` == `FIELDLIST` array of elements between `|`, always one, may have multiple children delimited by `~`
 *       - `node_t.type` == `FIELD`  array of fields delimited by `~`, always at least one
 *         - `node_t.type` == `COMP` optional components delimited by `^`
 *           - `node_t.type` == `SUBCOMP` optional sub components delimited by `&`
 *
 * The delimiters shown above are the standard delimiters. The HL7 file may 
 * define different delimiters which are read and accounted for by the parser.
 *
 * Every node has potential children indicated by node_t.num_children 
 * (0 means none) and has a parent (special case is message_t which is 
 * always the root of the structure and is the partent of node_type_t SEGMENT).
 * 
 * This also means, that some node types (node_t.type) need to be treated specially.
 * - node_type_t.SUBCOMP never has children
 * - node_type_t.SEGMENT always has a parent of type message_t, you need to cast it when accessing it
 * - node_type_t.SEGMENT the data property always contains the segment's name
 * 
 * ### creating a node
 *
 * To create a valid node_t you need to know it's data and length.
 * 
 * Example:
 * ```C
 * char *data = "abc";
 * int length = 4; // must include \0
 * node_t *n = create_node_t(FIELD, data, length, 0);
 *
 * // you may also create an empty node like this
 * node_t *n = create_node_t(FIELD, NULL, 0, 0);
 * ```
 * 
 * node_t takes owenship of the pointer pointing to `data`. be aware of the 
 * fact that when you free a node then all children and it's data will get freed.
 *
 * Example:
 * ```C
 * char *data = "abc";
 * int length = 4; // must include \0
 * node_t *n = create_node_t(FIELD, data, length, 0);
 * 
 * // free it
 * free_node_t(n);
 * // NOTE: data points to NULL now
 * ``` 
 * 
 * ### using the api
 * 
 * You should use the api to manage relationships between children and 
 * parents. this will make sure you don't get any dangling nodes in your 
 * structure.
 * 
 * Example:
 * 
 * ```C
 * int ret = -1;
 * message_t *root = create_message_t(NULL); // will create a message and meta with default delimiters
 * node_t *n = create_node(SEGMENT, "PID", 4, 0);
 * ret = message_append(&root, n);
 * if (ret != 0) {
 *   // whoopsie, something went wrong
 *   free_node_t(n);
 *   free_message_t(root);
 *   return;
 * }
 * 
 * // create PID-1(1) segment
 * node_t *pid1  = create_node(FIELDLIST, NULL, 0 , 0);
 * node_t *pid11 = create_node(FIELD, "DATA", 5 , 0);
 * ret = node_append(&n, n1);
 * ret = node_append(&n1, n11);
 * 
 * // from node_util.h
 * char *str = messate_to_string(root);
 * printf("%s\n", str);
 * free(str);
 * free_message(root); // frees all children
 * 
 * ```
 *
 * ### checking for children
 *
 * ```C
 * if (node->num_children > 0)
 *   // we have some children
 * ```
 *
 * ### getting the parent
 * message_t has a very similar structure (lacking data but having file metadata).
 * when accessing the parent of a `SEGMENT`, then the parent should be casted 
 * into `message_t*`.
 * 
 * ```C
 * if (node->type == SEGMENT)
 *   message_t* parent = (message_t*) node->parent;
 * else
 *   node_t* parent = node->parent;
 * ```
 */
typedef struct node_t {
    /** the type of the node */
    node_type_t type;
    /** unique id of the node */
    int id;

    /** pointer to parent node, should never be NULL */
    struct node_t *parent;
    /** array of child nodes */
    struct node_t **children;

    /** number of elements in children */
    int num_children;
    /** number of allocated elements in children */
    int _num_children_allocated;

    /** byte array of raw data, should be NULL if there is no data and "\0" when empty */
    unsigned char *data;
    /** number of bytes in data */
    size_t length;
    /** element position from the beginning of the segment in bytes, this might go away */
    int pos;
} node_t;

/**
 * @brief hl7 message container
 * 
 * This struct holds data about the hl7 file (delimiter) in `messgae_t.meta` as
 * well as a tree structure of nodes, stored in message_t.segments.
 * 
 * This is your main structure returned by the parser.
 * 
 * Example:
 * 
 * ```C
 *  *  * 
 * char *filename "some_file.hl7";
 * FILE *fd = fopen(filename, "rb");
 * message_t root = create_message_t(NULL);
 * int ret = hl7_decode(fd, &root);
 * fclose(fd); // no need for fd anymore, all data in memory
 * if (ret != 0) {
 *   // whoopsie
 *   free_message_t(root);
 * }
 * 
 * // do something with the data in root
 * 
 * // cleanup
 * free_message_t(root);
 * ```
 * 
 */
typedef struct message_state_t message_state_t; // forward declaration
typedef struct message_t {
    /** the type of the node */
    node_type_t type;
    /** unique id of the node */
    int id;
    
    /** pointer to parent node, should never be NULL */
    struct node_t *parent;
    /** number of allocated elements in children */
    struct node_t **segments;

    /** number of elements in children */
    int num_children;
    /** number of allocated elements in children */
    int _num_children_allocated;

    /** metadata containing delimiters and bom */
    hl7_meta_t *meta;

    /** callback functions for parser */
    message_state_t *state;
} message_t;


/**
 * @brief create raw fied structure
 * 
 * @returns raw_field_t
 */
raw_field_t* create_raw_field_t(void);

/** 
 * @brief free raw fiel structure
 */
void free_raw_field(raw_field_t* raw_e);

/**
 * @brief create a new node
 * 
 * you must make sure to properly free the node @see free_node_t(). You should 
 * add a pointer to the parent after creation like this: 
 * `my_node->parent = another_node;`
 * 
 * if you just plan to append as a child, @see node_append() and relationship 
 * with `parent` is taken care of.
 * 
 * @param type the node type
 * @param data byte array of data
 * @param length length including `\0` delimiting byte if there is any
 * @param pos position of element in line (segment). the position marks the beginnign delimiter
 * @returns initialized node
 */
node_t* create_node_t(node_type_t type, unsigned char *data, size_t length, int pos);

/**
 * @brief  cleanup all memory of a node
 * 
 * @note
 * be careful, if you have passed in pointers to node data or elements, 
 * they will be cleaned (node_t.data will be freed).
 * 
 * All children and children's children are freed.
 * 
 * @param node the node and it's children to be cleaned
 */
void free_node_t(node_t *node);

/**
 * @brief append a child node
 * 
 * Memory is allocated if _num_children_allocated is too small to hold an 
 * additional pointer in the children array. You must make sure to 
 * properly allocate memory for the child itself, use create_node_t() for that.
 * 
 * @param parent the parent node to be associated, may be null (this is reserved for the root node)
 * @param node the node to append
 * @return 0 on success, 1 if realloc() fails, 2 if parent is missing
 */
int node_append(node_t** parent, node_t *node) ;

/**
 * @brief find the position in parent's children struct
 * 
 * This is useful to find next/previous siblings.
 * 
 * @returns pos in child array, `-1` if there is no parent and `-2` if the node is not found in parents children
 */
int node_parent_child_pos(node_t *node);

/**
 * Sub component parser
 * 
 * This is the sub component prser that takes care of braking down all
 * elements in a field delimited by `^~&`.
 *
 * creates a node structure for components and subcomponents.
 * don't forget to set the parent after creation. This is an internal 
 * function of the parsers and is not useful anywhere else.
 * 
 * @todo
 * move to decode.c
 * 
 * @param raw_e raw field elements
 * @param meta hl7 meta data, containing delimiter
 * @param start_pos the position of the first delimiter beginning data of thes field in the segment
 * @returns node structure
 */
node_t *process_node(raw_field_t* raw_e, hl7_meta_t *meta, int start_pos);

/**
 * @brief dump raw_e structure to stdout
 * 
 * @note
 * this is only useful for debugging insede the parser
 * 
 * @todo
 * move to decode.c
 * 
 * @param raw_e the node's delimiter information
 */
void disply_raw_node(raw_field_t* raw_e);

/**
 * @brief check if a node with given addres exists
 * 
 * @param segment node tree to search (must be of type segment)
 * @param addr the address to look up
 * @returns the node if found, NULL otherwise
 */
node_t *node_in_segment(node_t *segment, hl7_addr_t *addr);

/*
 * denormalize node structure into string
 * 
 * @param node node and children to turn into string representation
 * @param meta hl7 metadata for delimiters
 * @param length returns the length of the allocated buffer
 * @returns allocated buffer
 */
//char* node_to_string(node_t *node, hl7_meta_t* meta, int *length);

/**
 * @brief string representation of node_type_t
 * 
 * @param type type
 * @returns string name of type
 */
const char *node_type_to_string(node_type_t type);

/**
 * @brief initialize an empty messagte_t struct
 * 
 * This function wil lsetup your message_t. If the first param is `NULL` then
 * default values for message_t.meta and message_t.meta.bom are set 
 * according to init_hl7_meta_t().
 * 
 * @see init_hl7_meta_t()
 * @param meta or NULL
 * @returns message_t with default values
 */
message_t* create_message_t(hl7_meta_t *meta);

/**
 * @brief free message_t an all it's child objects
 * 
 * This method will free messgate_t.meta and messagte_t.segments (recoursively).
 * 
 * @note
 * all pointer pointg to nodes within this message will be invalid after this 
 * function has been executed, be careful.
 * 
 * @param message the message to free
 */
void free_message_t(message_t *message);

/**
 * @brief append a segment to the message
 * 
 * This method will dynamically allocate more memory for message_t.segments
 * and store the number of allocated items in messgae_t._num_children_allocated.
 * 
 * @param parent typically the root node, a message_t object
 * @param node the segment to append to message
 * @returns 0 on success, 1 if no memory could be allocated
 */
int message_append(message_t **parent, node_t *node);

/**
 * @brief generate an addr from any node in a message
 * 
 * traverse up until message is reached. The structure must have a message type
 * as top parent or it will return `NULL`.
 * 
 * while traversing up through parents, if type is `MESSAGE` and parent 
 * is `NULL`, we have successfully reached the top and can produce a result.
 * 
 * @param node the starting point from where to traverse up
 * @returns hl7_addr_t address structure.
 */
hl7_addr_t* addr_from_node(node_t *node);


// node_util.h ///////////////////
/** \file
 * @brief experimental hl7 editing methods
 * 
 * Node manipulation utilities
 * 
 * @note
 * this is work in progress and by no means complete nor stable
 */

/**
 * 
 * 
 */


/**
 * @brief append child
 * 
 * @see node_append()
 * @param parent the parent node to append a child to
 * @param node the ndoe to append to parent
 * @returns 0 on success or error code otherwise
 */
int node_append_child(node_t *parent, node_t* node);

/**
 * @brief unimplemented: set a child at the sold position
 * 
 * This function set a child at `pos`.
 * 
 * We have cases to handle
 * - num_children-1 < pos: pad children with empty nodes first, return NULL
 * - num_children-1 >= pos: replace existing node and return pointer to old node
 * 
 * @todo
 * unimplemented!
 * 
 * @todo
 * The caller must take care to deallocate the replaced node. This is a bit 
 * dangerous, maybe it should be done here.
 * 
 * @param parent the parent node to append a child to
 * @param child the ndoe to append to parent
 * @param pos insert at this position
 * @returns new node
 */
node_t *node_insert_child_at(node_t *parent, node_t *child, int pos);

/**
 * @brief unimplemented: remove a node
 * 
 * this method will check parent and remove the reference from
 * the children array, moving all elements after the removed one 1 
 * slot to the front.
 * 
 * All child nodes (if any) are also deleted.
 * 
 * @todo
 * unimplemented!
 * 
 * @note
 * define a struct, where poitners to empty nodes are kept. so we 
 * can detect if a user tries to access it later. 
 * remove data, keep structure.
 * 
 * @see node_remove_child()
 * @param node to remove from tree
 */
int node_remove(node_t *node);

/**
 * @brief unimplemented: remove a child from the children array
 * 
 * find node, move all subsequent children one slot to the left,
 * adjust num_children and return a reference to the removed 
 * node for de-allocation or further use.
 * 
 * @todo
 * unimplemented!
 * 
 * @param parent the parent node to append a child to
 * @param child the ndoe to append to parent
 * @param pos insert at this position
 */
node_t *node_remove_child(node_t *parent, node_t *child, int pos);

/**
 * @brief create and append and empty child node
 * 
 * Special treatement for `FIELDLIST`, a `FIELD` node is always appended
 * and returned.
 * 
 * @param parent the parent node to append a child to
 * @returns new node
 */
node_t *node_create_n_append(node_t *parent);

/**
 * @brief create empty child nodes until count is reached
 * 
 * pad the children array until count is reached. care must be taken with 
 * node type `NODELIST` which should always get a single empty child in
 * the node list iself.
 * 
 * @see node_create_n_append()
 * @param parent parent node
 * @param count number to pad children to
 * @returns last child added
 */
node_t *node_pad_children(node_t *parent, int count);

/**
 * @brief create an empty node
 * 
 * for type `FIELDIST`, always create at least one field child node.
 * 
 * @note
 * returns `NULL` on `FIELDLIST` which seems a bit strange.
 * 
 * @deprecated
 * this interface makes no sense, it's a duplicate of create_node_t() 
 * with a better name
 * 
 * @param type node type to create
 * @returns new node
 */
node_t *node_create_empty(node_type_t type);

/**
 * @brief convert node structure into string
 * 
 * This method concatenates a whole tree into a string. It can be used 
 * anywhere in the tree structure from `SEGMENT` downwards.
 * 
 * If you want to use specific delimiters, make sure to create a custom meta_t
 * structure. Otherwise (using default delimiter as of HL7 specification)
 * just provice a default created with init_hl7_meta_t().
 * 
 * The length of the string is returned in the pointer of parameter 3 `length`,
 * however, the return string os `\0` delimited.
 * 
 * @param node root node
 * @param meta delimiter definitions
 * @param length returns the length of the output string
 * @returns string representation of node tree
 */
unsigned char *node_to_string(node_t *node, hl7_meta_t *meta, int *length);

/**
 * @brief write contents of message to file
 * 
 * same as `message_to_string()` but writing to a file directly.
 * 
 * This message returns the bytes writte on sucess or a 
 * negative integer on error.
 * 
 * Error codes:
 * - `-1`: failed to open file for writing
 * - `-2`: failed to convert segment to string
 * 
 * @see message_to_string()
 * @param message input to convert to string and write to file
 * @param filename string to file name where result shall be written
 * @returns 0 on succes, error otherwise
 */
int message_to_file(message_t *message, char *filename);

/**
 * @brief convert message structure to hl7 file
 * 
 * returns a string containing all segments covnerted to string, 
 * separated by delimiters defined in meta.
 * 
 * If you want to use different line or field separators, you may change these 
 * in meta before passing it into this method.
 * 
 * This method will return a string buffer on success, or NULL on error.
 * 
 * @param message input to convert to string 
 * @returns NULL on error else array buffer containing the hl7 file content
 */
unsigned char *message_to_string(message_t *message);

/**
 * @brief update date of a node
 * 
 * Checks and frees already associated data.
 * 
 * Then allocates memory for the data and assoiates it with the node, 
 * length is udpated (length is the actual length, if you are setting a 
 * string, you have to count the `\0` as well).
 * 
 * @param node 
 * @param data array buffer to set data of node to
 * @param length of the array buffer
 * @returns 0 on succes, -1 when memory could not be allocated
 */
int node_set_data(node_t *node, unsigned char *data, int length);

/**
 * @brief set string data to node
 * 
 * convenience function for `node_set_data()`. This method takes care of 
 * tracking the data length. It will copy all data up until the 
 * first `\0` character).
 * 
 * @param node 
 * @param data
 * @returns 0 on succes, -1 when memory could not be allocated
 */
int node_set_string(node_t *node, char *data);

/**
 * @brief find a node by address
 * 
 * The address of the found node will be set in the 3rd parameter. If the node
 * could not be found, the return value will currespond to the node level
 * where we could not find anything.
 * 
 * Let's say, the segment does not exist, then we return `SEGMENT`. If
 * the fieldlist is shorter than addr->fieldlist, then we return 
 * `FIELDLIST`, etc.
 * 
 * Return codes: if the node cannot be found, the return code will correspond 
 * to the node type where we faild to traverse.
 * 
 * @param message root node
 * @param addr address to search for
 * @param node this pointer contains the result node or NULL if not found
 * @returns If found, we return 0 and *node will point to the found node
 */
int node_get_by_addr(message_t* message, hl7_addr_t *addr, node_t **node);

/**
 * @brief set a value by address
 * 
 * This method creates a node structure if it does not exist.
 * 
 * Then it sets the value to the provided value.
 * 
 * If the node already existed, the method will 
 * destroy (deallocate) the old node and replace it with 
 * the new one.
 * 
 * return codes:
 * - `1`: Failed to allocate fieldlist 
 * - `2`: Failed to allocate field
 * - `3`: Failed to set data on field
 * - `4`: Failed to pad children on field (Comp)
 * - `5`: Failed to set data on comp
 * - `6`: Failed to pad children on field (subcmp)
 * - `7`: Failed to set data on subcomp
 * - `21`: tryed to add a component to a field that already has data
 * - `22`: tryed to add a sub-component to a component that already has data
 * 
 * @param message root node to traverse to
 * @param addr address of the new node
 * @param value array of the new node's content
 * @param length of value
 * @returns 0 on success, error code otherwise
 */
int node_set_by_addr(message_t *message, hl7_addr_t *addr, unsigned char *value, int length);



// logging.h ///////////////////
/** \file
 * @brief logging functioins and macros
 * 
 * if the macro PRODUCTION is set, all log functions will output nothing.
 */

/** log elvel fatal */
#define LEVEL_FATAL     1
/** log elvel critical */
#define LEVEL_CRITICAL  2
/** log elvel error */
#define LEVEL_ERROR     3
/** log elvel warning */
#define LEVEL_WARNING   4
/** log elvel notice */
#define LEVEL_NOTICE    5
/** log elvel info */
#define LEVEL_INFO      6
/** log elvel debugging */
#define LEVEL_DEBUG     7
/** log elvel trace */
#define LEVEL_TRACE     8

#ifndef PRODUCTION
	/**
	 * @brief log fatal
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_fatal(fmt, ...) \
		logprint("%s %s:%d " fmt "\n", \
		logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__)
	/**
	 * @brief log critical
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_critical(fmt, ...) \
		logprint("%s %s:%d " fmt "\n", \
		logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__)
	/**
	 * @brief log error
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_error(fmt, ...) \
		logprint("%s %s:%d " fmt "\n", \
		logtime(), __FUNCTION__, __LINE__, ##__VA_ARGS__)
		
	/**
	 * @brief log warning
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_warning(fmt, ...)  if (loglevel >= LEVEL_WARNING)  \
		logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, \
		__LINE__, ##__VA_ARGS__)
	/**
	 * @brief log notice
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_notice(fmt, ...)  if (loglevel >= LEVEL_NOTICE)  \
		logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, \
		__LINE__, ##__VA_ARGS__)
	/**
	 * @brief log info
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_info(fmt, ...)  if (loglevel >= LEVEL_INFO)  \
		logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, \
		__LINE__, ##__VA_ARGS__)
	/**
	 * @brief log debug
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_debug(fmt, ...) if (loglevel >= LEVEL_DEBUG) \
		logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, \
		__LINE__, ##__VA_ARGS__)
	/**
	 * @brief log trace
	 * @param[in] fmt format
	 * @param[in] ... arguments
	 */
	#define log_trace(fmt, ...) if (loglevel >= LEVEL_TRACE) \
		logprint("%s %s:%d " fmt "\n", logtime(), __FUNCTION__, \
		__LINE__, ##__VA_ARGS__)

#else

	#define log_fatal(fmt, ...) ;
	#define log_critical(fmt, ...) ;
	#define log_error(fmt, ...) ;
	#define log_warning(fmt, ...) ;
	#define log_notice(fmt, ...) ;
	#define log_info(fmt, ...) ;
	#define log_debug(fmt, ...) ;
	#define log_trace(fmt, ...) ;

#endif



/**
 * @brief global log level
 * 
 * must be declared in main program
 */
extern const int loglevel;

/**
 * @brief log function
 * 
 * This method is a drop in replacement for printf()
 * 
 * @see sprintf()
 * @param fmt format according to sprintf()
 */
void logprint(const char *fmt, ...);

/**
 * @brief create timestamp
 * 
 * @returns current time in "%Y-%m-%d %H:%M:%S" 
 */
const char *logtime(void);

// search.h ///////////////////
/** \file
 * @brief hl7 search utilities
 */


#ifdef _WIN32
    #endif

/**
 * @brief search version information
 */
const char *version_search();

/**
 * @brief command line parameters
 * 
 * this structure holds the command line parameters.
 */
typedef struct flags_t {
    /** verbose */
    int verbose;
    /** search term */
    int search_term;
    /** greedy */
    int greedy;
    /** json output */
    int output_json;
    /** xml output */
    int output_xml;
    /** csv output */
    int output_csv;
    /** address to search in */
    int address;
    /** search_term */
    unsigned char *search_term_value;
    /** address string */
    char *address_value;
    /** quiet, output values only */
    int quiet;
    /** base64 decode values */
    int decode64;
    /** use output file instead of stdout */
    int output_file;
    /** output file name */
    char* output_file_value;
    /** output file handle */
    FILE *output_fd;
    /** case insensitive search */
    int case_insensitive;
} flags_t;

/**
 * @brief search modes
 */
typedef enum search_mode_t {
    /** stupid, dumb case insensitive search over file */
    SEARCH_SUBSTRING = 0, 

    /** search only lines which start with segment name */
    SEARCH_SEGMENT   = 1,

    /** parse segments and search in specific fields */
    SEARCH_NODE      = 2
} search_mode_t;

/**
 * @brief search result
 * 
 * pos is the position in the line (SEARCH_SEGMENT/SEARCH_SUBSTRING) or
 * position in the segment (SEARCH_NODE)
 */
typedef struct result_item_t {
    /** file name */
    char *file;
    /** result line number */
    int line_num;
    /** position of result in segment or line */
    int pos;      
    /** optional, for segment based searches */
    hl7_addr_t *addr; 
    /** field or result content */
    char *str;
    /** length of the data buffer confusingly named str */
    int length;
} result_item_t;

/**
 * @brief holds 0-N result items
 * 
 * Holds relevant search parameters and result items. This is used as buffer 
 * during search over various files.
 */
typedef struct search_res_t {
    /** file name */
    char *file;
    /** array of addresses to search */
    hl7_addr_t **addr;
    /** number of addresses in addr */
    int addr_l;
    /** search until first result in file (greedy==0) or keep going (greedy==1) */
    int greedy;
    /** search string */
    unsigned char *search_term;
    /** number of result items? */
    int length;
    /** result item array */
    result_item_t **items;
} search_res_t;


/**
 * @brief create default values for flags_t
 */
flags_t *create_flags_t();

/**
 * @brief initiaizes a result container
 * 
 * @param search_term string to search for
 * @returns an empty result contrainer
 */
search_res_t *create_search_res(unsigned char* search_term);

/**
 * @brief frees all data associated with search result
 * 
 * make sure to allocate data for every item.
 * 
 * @param[out] sr search result struct
 */
void free_search_res(search_res_t *sr);

/*
 * @brief append result to result container
int append_result(char *file, int line_num, int pos, hl7_addr_t *addr, search_res_t **res, char *str, int length);
 */

/**
 * @brief parse address string
 * 
 * split address strings by spaces.
 * 
 * The array is terminated by a sentinel.
 * 
 * @param addrstr address string to parse
 * @param length length of string
 * @returns array of addresses
 */
hl7_addr_t **parse_address(char *addrstr, int *length);

/**
 * @brief free address array
 * 
 * will free al lelements and the array itself.
 * 
 * @param[out] addr address array
 */
void free_addr_arr(hl7_addr_t **addr);

/**
 * @brief search for files
 * 
 * Return codes:
 * - 0: success
 * - 11: failed to open file
 * - 12: failed to parse meta data @see read_meta()
 * - 13: invalid address @see parse_address()
 * - 14: invalid/unknown search mode
 * - 2*: general error in SEARCH_SUBSTRING, @see search_substring()
 * - 3*: general error in SEARCH_SEGMENT, @see search_segment()
 * - 4*: general error in SEARCH_NODE, @see search_node()
 * 
 * @param filename file to search
 * @param flags search configuration
 * @returns 0 on success, error code otherwise
 */
int search_file(char *filename, flags_t flags);

/**
 * @brief search substring
 * @todo
 * not implemented
 * 
 * @param fd file to search in, seeking from the current position
 * @param[out] sr search result container
 * @returns 0 on success, 20-29 on error
 */
int search_substring(FILE* fd, search_res_t *sr);

/**
 * @brief this search variant is a line based search
 * 
 * We fetch every line, compare the segment name with the ones which are 
 * sought after. If the segment name matches with one in the addr definitions
 * we do a substring search. 
 * 
 * If the greedy option is set, we are searching for all occourances of 
 * sr->search_term. If not, the function returns upon the first match or
 * when the end of file is reached.
 * 
 * Results are stored in sr. sr->length holds the number of results, 
 * sr->items is an array of search results.
 * 
 * The user must take care to properly deallocate sr after use. 
 * @see free_search_res()
 * 
 * Return Codes:
 *  - 30: failed to allocate memory
 *  - 31: failed to detect delimiters 
 *  - 32: no line delimiter found, we do not accept one line files
 * 
 * @param fd file to search in, seeking from the current position
 * @param[out] sr search result container
 * @returns 0 on success, 30-39 on error
 */
int search_segment(FILE* fd, search_res_t *sr);

/**
 * search sub nodes
 * 
 * concatenates all sub nodes of an address and searches for substring in
 * sr->search_term. if sr->search_term is NULL, then the concatenated line is 
 * returned as new sr->items.
 * 
 * Return Codes:
 * - 1: failed to search sub_nodes @see _search_subnodes()
 * 
 * @param fieldlist the base node to search in
 * @param[out] sr pointer to search result
 * @param line_num needed to add positional information to sr 
 * @param meta the current hl7 file meta data
 * @param segment_rep segment repetition, all == -1
 * @returns int 0 on success
 */
int search_subnodes(node_t *fieldlist, search_res_t *sr, int line_num, 
                    hl7_meta_t *meta, int segment_rep);

/**
 * @brief parse line and find in sub elements
 * 
 * Error codes:
 * - 41: failed to read metadata @see read_meta()
 * - 42: failed to parse segment
 * - 43: failed to search subnodes @see search_subnodes()
 *  
 * 
 * @param fd file to search in, seeking from the current position
 * @param[out] sr search result container
 * @returns 0 on success, 40-49 on error
 */
int search_node(FILE* fd, search_res_t *sr);

/**
 * @brief JSON value, entities escaped
 * 
 * Escape all `"` characters with `\\` + `"` inside the string.
 * 
 * @param item item containing item->str with data and item->length 
 * @param flags command line search flags
 * @returns json string value
 */
void print_json_value(result_item_t *item, flags_t flags);

/**
 * @brief XML value, entities escaped
 * 
 * make sure to escape all vital xml entities and wrap a tag around the value.
 * 
 * | Special character | gets replaced by | escaped form |
 * |------------------ |----------------- |------------- |
 * | Ampersand         | `&amp;`          | `&`          |
 * | Less-than         | `&lt;`           | `<`          |
 * | Greater-than      | `&gt;`           | `>`          |
 * | Quotes            | `&quot;`         | `"`          |
 * | Apostrophe        | `&apos;`         | `'`          |
 * 
 * @param item iem containing item->str with data and item->length 
 * @param flags command line search flags
 * @returns xml tag as string
 */
void print_xml_value(result_item_t *item, flags_t flags);

/**
 * @brief print a result item as json
 * 
 * @param item the item to print
 * @param flags command line search flags
 * @param last 1 if this is the last record, so we can ommit the the comma after the object
 */
void output_json(result_item_t *item, flags_t flags, int last);

/**
 * @brief print a result item as xml
 * 
 * @param item the item to print
 * @param flags command line search flags
 */
void output_xml(result_item_t *item, flags_t flags);

/**
 * @brief print a result item as csv
 * 
 * @param item the item to print
 * @param flags command line search flags
 */
void output_csv(result_item_t *item, flags_t flags);

/**
 * @brief printf a result
 * 
 * @param item the result item to print
 * @param flags command line arguments
 */
void output_string(result_item_t *item, flags_t flags);


// util.h ///////////////////
/** \file
 * @brief hl7 utilitiy functions
 */



#ifdef _WIN32
    /** windows long int */
    #define SIZE_T_L "%lld"
#else
    /** unix long int */
    #define SIZE_T_L "%ld"
#endif

// ascii for window's CMD.EXE
#ifdef _WIN32
    /** console marker for node */
    #define MARKER_T "\xC3"
    /** console marker for leaf */
    #define MARKER_L "\xC0"
    /** console marker for none */
    #define MARKER_D "\xC4"
#else // UTF-8 for the rest
    /** console marker for node */
    #define MARKER_T "\u251C"
    /** console marker for leaf */
    #define MARKER_L "\u2514"
    /** console marker for none */
    #define MARKER_D "\u2500"
#endif


/**
 * @brief copy a chunck of memory
 * 
 * Will allocate memory for you, you are in charge to free it later.
 * 
 * @param src byte array to copy
 * @param length length of input array
 * @returns pointer to copy byte array 
 */
void *memdup(void* src, size_t length);

/**
 * @brief print a parsed HL7 structure
 * 
 * debug function, used by 7parse to display an ASCII tree of the file content.
 * 
 * @param message the message to print
 */
void dump_structure(message_t *message);

/**
 * @brief trim white space at the beginnign and end of a string
 * 
 * @note
 * all trailing bytes of whitespace are replaced with '\0' in the original 
 * string and the pointer is shifted at the beginning to the first non 
 * whitespace character. You will loos track of the original beginning of 
 * the memory chunk. Use with care (always use a copy of the original pointer).
 * 
 * @param[out] str to string
 * @returns pointer to the new beginning of the string
 */
char *trim(char *str);

/**
 * @brief escape a character in a string
 * 
 * This function will allcoate a new string with all occourances of
 * quote_char prepended with escape_char.
 * 
 * You must take care of freeing the result!
 * 
 * @param str string to escape
 * @param quote_char the character to find and escape
 * @param escape_char the character to prepend in front of quote char
 * @return char array with double quote backslash escaped
 */
char *escape(char *str, char *quote_char, char escape_char);


// message_state.h ///////////////////
/** \file 
 * @brief private message structures
 * 
 * This file describes a structure that is used to keep the state
 * of the parser while parsing a hl7 file. It is primarely used for
 * registering callback functions.
 * 
 * If you want to analyze parts of the document while the parser is still 
 * running or you are using it in a multi-threaded environment, then there is 
 * the possibility to add callback functions. The following callback hooks 
 * are available:
 * - `void (*cb_progress)(message_t *message, size_t total, size_t current);`
 * - `void (*cb_start)(message_t *message)`
 * - `void (*cb_end)(message_t *message, size_t max, size_t current, int exit_code)`
 * - `void (*cb_segment)(message_t *message, size_t num, char name[3])`
 * 
 * Also, you can controll how often the `cb_progress()` callback is fired, 
 * default is every 1% of progress (does not fire on files smaller 
 * than 100 bytes).
 * 
 * You may register callbacks as follows:
 * 
 * ```C
 *  *  * 
 * static void cb_progress(message_t *message, size_t total, size_t current) {
 *     printf("Parsing: %02d%%\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", (current*100/total)+1);
 *     fflush(stdout);
 * }
 * 
 * static void cb_end(message_t *message, size_t max, size_t current, int exit_code) {
 *     // clear progress line in terminal
 *     printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b");
 *     if (exit_code != 0) {
 *         printf("Parser failed with exit code: %02d\n", exit_code);
 *     } 
 * }
 * 
 * int main() {
 *     char *filename "some_file.hl7";
 *     FILE *fd = fopen(filename, "rb");
 *     message_t root = create_message_t(NULL);
 * 
 *     // example of a progress callback, it will show on very large files
 *     size_t fd_pos = ftell(fd);
 *     fseek(fd, 0, SEEK_END);
 *     size_t file_size = ftell(fd);
 *     fseek(fd, fd_pos, SEEK_SET); // reset
 *     
 *     // register callback progress
 *     root->state->cb_progress = cb_progress;
 *     // call progress every 5%
 *     root->state->progress_every = file_size / 20;
 *     // add a callback at the end of paring, which will clear the progress line
 *     root->state->cb_end = cb_end;
 * 
 * 
 *     // start the parser
 *     int ret = hl7_decode(fd, &root);
 *   
 *     fclose(fd); // no need for fd anymore, all data in memory
 *     if (ret != 0) {
 *       // whoopsie
 *       free_message_t(root);
 *     }
 *   
 *     // do something with the data in root
 *   
 *     // cleanup
 *     free_message_t(root);
 * }
 * ```
 * 
 */

typedef struct message_t message_t; // forward declaration
/**
 * @brief holds callbacks and associated variables
 */
typedef struct message_state_t {
    /**
     * @brief fire progress callback every N bytes
     * 
     * If this value is 0, then progress will be fired every at every 1% 
     * of progress.
     */
    int progress_every;

    /**
     * @brief The total length of the parsed file
     * 
     * This is roughly file size (minus the size of the bom if there is any).
     * `parsed_length` will be set by the parser before the parsing starts.
     */
    size_t parsed_length;

    /**
     * @brief progress callback
     * 
     * This callback will fire every N bytes and will provide the numebr of 
     * currently read bytes `current` and the number of total bytes to 
     * read `total`.
     * 
     * Be carefull how often you make this event fire, on very large files
     * (MDM with base64 embeedded data for example) this could be fired 
     * millions of times.
     */
    void (*cb_progress)(message_t *message, size_t total, size_t current);

    /** 
     * @brief start event, will be fired once when the parser starts
     */
    void (*cb_start)(message_t *message);

    /**
     * @brief finish callback. 
     * 
     * Will be fired when parsing finished and will provide
     * the numebr of bytes processd and the exit code of the parser.
     */
    void (*cb_end)(message_t *message, size_t max, size_t current, int exit_code);

    /**
     *@brief segment parsed callback
     * 
     * will be fired once a segment is parsed and will provide the segment 
     * number `num` (roughly equals line number, blank lines not counted) 
     * and the segment name `name`
     */
    void (*cb_segment)(message_t *message, size_t num, char name[3]);
} message_state_t;


/**
 * @brief initializes an message_state_t structure
 */
message_state_t *message_state_new();

/**
 * @brief frees an messate_state_t structure
 */
void message_state_free(message_state_t *ms);

/*
static size_t hl7_decode_cb_max, 
              hl7_decode_cb_current, 
              hl7_decode_cb_every = 0;

static void (*hl7_decode_cb_callback)(size_t max, size_t current);
static void hl7_decode_cb_reset() {
    hl7_decode_cb_max = 0;
    hl7_decode_cb_current = 0;
    hl7_decode_cb_every = 0;
}

static void hl7_decode_cb(size_t max, size_t current) {
    // global vars to track progress, be careful, this is not thread safe
    hl7_decode_cb_callback(hl7_decode_cb_max, hl7_decode_cb_current);
}

void hl7_decode_cb_register(void (*callback)(size_t max, size_t current), size_t max, size_t every) {
    hl7_decode_cb_reset();
    hl7_decode_cb_every    = every;
    hl7_decode_cb_max      = max;
    hl7_decode_cb_callback = callback;
}
*/



#endif // LIB7_H
