commit 93d838f14c3e8abfcb51919a2605c6ea59f7e48d Author: anon <anon@anon.anon> Date: Thu Feb 9 16:10:24 2023 +0100 v1.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7b2ba7b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +ati.out +obj/*.o diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..4da57ad --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +CXX:=g++ +CFLAGS:=-O0 -ggdb -std=c++17 +CPPFLAGS:=-D DEBUG +LDLIBS:= +LDFLAGS:= +COMP:=$(CXX) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(LDLIBS) + +OBJD:=obj/ +SRCD:=src/ +SRC:=main.cpp lexer.cpp algo.cpp +SRC:=$(addprefix ${SRCD},${SRC}) +OBJ:=$(subst .cpp,.o,$(subst ${SRCD},${OBJD},${SRC})) + +OUTPUT:=ati.out + +.PHONY: main clean run + +main: ${OBJ} + ${COMP} ${OBJ} -o ${OUTPUT} + +obj/%.o: src/%.cpp + ${COMP} -c $< -o ${OBJD}/$*.o + +clean: + -rm ${OBJD}/* + -rm ./${OUTPUT} + +run: + ./${OUTPUT} + +gdb: + sudo gdb --directory=./src -p $(shell pgrep ${OUTPUT}) + +algo: + make -C debug/ diff --git a/NOTES.txt b/NOTES.txt new file mode 100644 index 0000000..e69de29 diff --git a/TODO.list b/TODO.list new file mode 100644 index 0000000..9ccd267 --- /dev/null +++ b/TODO.list @@ -0,0 +1,3 @@ +/*--------------------------------------------Todos------------------------------------------------\\ +| +\\-------------------------------------------------------------------------------------------------*/ diff --git a/debug/Makefile b/debug/Makefile new file mode 100644 index 0000000..cdde5e2 --- /dev/null +++ b/debug/Makefile @@ -0,0 +1,4 @@ +m4: + for i in *.m4; do \ + m4 $$i > $$(basename $$i .m4); \ + done diff --git a/debug/assignment.algo b/debug/assignment.algo new file mode 100644 index 0000000..fea73ea --- /dev/null +++ b/debug/assignment.algo @@ -0,0 +1,2 @@ +1;0;i;=;1;0;0 +1;0;h;=;2;0;0 diff --git a/debug/eval.algo b/debug/eval.algo new file mode 100644 index 0000000..b1d9b75 --- /dev/null +++ b/debug/eval.algo @@ -0,0 +1,3 @@ +1;0;i;=;1;0;0 +1;0;h;=;2;0;0 +1;0;k;=;3;*;h diff --git a/debug/i.algo b/debug/i.algo new file mode 100644 index 0000000..c96fc60 --- /dev/null +++ b/debug/i.algo @@ -0,0 +1,47 @@ +" Lehmer generator +0;lehmer;=;4;0;0 +1;7;0;0;0;0 +0;argv-1;=;argv-1;*;48271; +0;argv-1;=;argv-1;%;2147483647 +1;return;0;0;0 +" swap +0;swap;=;10;0;0 +1;14;0;0;0;0 +0;buf;=;argv-1;0;0 +0;argv-1;=;argv-2;0;0 +0;argv-2;=;buf;0;0 +1;return;0;0;0 +" call +0;i;=;0;0;0 +0;argv-1;=;11;0;0 +1;lehmer;1;0;0;0 +0;argv-2-i;=;argv-1;0;0 +0;i;=;i;+;1 +3;17;0;i;=;10 +0;argv-1;=;10;0;0 +1;24;1;0;0;0 +1;return;0;0;0 +" selection sort +0;n;=;argv-1;0;0 +0;i;=;0;0;0; +0;a-i;=;argv-2-i;0;0 +0;i;=;i;+;1 +3;27;0;i;=;10 +" body +0;i;=;0;0;0; +3;return;0;i;<;n +0;min;=;i;0;0 +0;j;=;i;+;1 +3;40;0;j;<;n +3;38;0;a-j;<;a-min +0;min;=;j;0;0 +0;j;=;1;+;j +1;35;0;0;0 +2;46;0;min;=;i +0;argv-1;=;a-i;0;0 +0;argv-2;=;a-min;0;0 +1;swap;1;0;0;0 +0;a-i;=;argv-1;0;0 +0;a-min;=;argv-2;0;0 +0;i;=;i;+;1 +1;32;0;0;0 diff --git a/debug/i.algo.m4 b/debug/i.algo.m4 new file mode 100644 index 0000000..afa5dac --- /dev/null +++ b/debug/i.algo.m4 @@ -0,0 +1,47 @@ +" Lehmer generator +0;lehmer;=;eval(__line__+2);0;0 +1;eval(__line__+4);0;0;0;0 +0;argv-1;=;argv-1;*;48271; +0;argv-1;=;argv-1;%;2147483647 +1;return;0;0;0 +" swap +0;swap;=;eval(__line__+2);0;0 +1;eval(__line__+5);0;0;0;0 +0;buf;=;argv-1;0;0 +0;argv-1;=;argv-2;0;0 +0;argv-2;=;buf;0;0 +1;return;0;0;0 +" call +0;i;=;0;0;0 +0;argv-1;=;11;0;0 +1;lehmer;1;0;0;0 +0;argv-2-i;=;argv-1;0;0 +0;i;=;i;+;1 +3;eval(__line__-3);0;i;=;10 +0;argv-1;=;10;0;0 +1;eval(__line__+2);1;0;0;0 +1;return;0;0;0 +" selection sort +0;n;=;argv-1;0;0 +0;i;=;0;0;0; +0;a-i;=;argv-2-i;0;0 +0;i;=;i;+;1 +3;eval(__line__-2);0;i;=;10 +" body +0;i;=;0;0;0; +3;return;0;i;<;n +0;min;=;i;0;0 +0;j;=;i;+;1 +3;eval(__line__+5);0;j;<;n +3;eval(__line__+2);0;a-j;<;a-min +0;min;=;j;0;0 +0;j;=;1;+;j +1;eval(__line__-4);0;0;0 +2;eval(__line__+6);0;min;=;i +0;argv-1;=;a-i;0;0 +0;argv-2;=;a-min;0;0 +1;swap;1;0;0;0 +0;a-i;=;argv-1;0;0 +0;a-min;=;argv-2;0;0 +0;i;=;i;+;1 +1;eval(__line__-15);0;0;0 diff --git a/docs/ALGO_DETAIL.txt b/docs/ALGO_DETAIL.txt new file mode 100644 index 0000000..3d13e6b --- /dev/null +++ b/docs/ALGO_DETAIL.txt @@ -0,0 +1,152 @@ +{ + int myArray[] = {34, 23, 78, 41, 7, 87, 52, 36, 29, 42}; + // + void selectoinSort(int a[], const int &n){ + int i = 0; + while(i < n){ + int min = i; + int j = i + 1; + while(j < n){ + if(a[j] < a[min]){ + min = j; + } + j = j + 1; + } + if(min != i){ + swap(a[i], a[min]) + } + i = i + 1; + } + } + // + selectionSort(myArray, 10); +} + | ID | PTR | INDEX | VALUE | + :------:-------:---------:-----------: + | x | seq | 0 | + // array + | 1 | int | 0 | 34 | + ... + | 1 | int | 9 | 42 | + + class fun{ + WORD* argv[]; + }; + +// | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | +// 1 +// A +// | +//reserverd bit +// +// | typename | var-0 | var-1 | var-2 | +// byte-0 addr-1 addr-2 addr-3 + Atom: + +-----atom-----+ + | +--------+ | + | | sor id | | + | +--------+ | + +--------------+ + >nem tudunk csak atomra ugrani + >az atomok eddig instrukció fajtától függően határoztuk meg, viszont kreatívabban is lehetne + +### Original ### + Folyamatábra_elem : "Folyamatábra elem"; atom név + AD_ID : "AlgoDetail ID"; atom id ^ ^ ^ \_ elsődleges kulcs + Ssz : "?!"; sor id . . . . . . . . . / + V : "Változó"; cím + V_ndx : "Változó iNDeX"; cím offset // >>>NOTE1 + VM : "Változó Művelet" + func : "FUNCió"; függvény hívás // >>>NOTE3 + J id : "Jump ID"; ugrás célpont; egy atom id + // azt hiszem nyújt egy fajta biztonságot + ( \ + V1 : "Változó 1" \ + V_ndx : "Változó iNDeX"; inkonzisztens módon nincs megszámozva /* >>>NOTE1 */\ + C1 : "Constant 1" \ + ) \ + AM : "Alg Művelet" } >>>NOTE2 + ( / + V2 / + V_ndx / + C2 / + ) / + VB1 \ + CB1 \ + F1 \ + VJ1 \ + CJ1 \ + LM : "?! Művelet" } >>>NOTE2 + VB2 / + CB2 / + F2 / + VJ2 / + CJ2 / + + NOTE: + >hunglish + >1*: a változónév alapból magasszintű absztraktció; csak az olvashatóság/parsibleness kedvéért van értelme külön kezelni + >3*: teljesen redundáns az ugrás mező miatt, hacsak nem a paramétereket is belekódoljuk. + ebben az esetben kell egy módszer amivel egy kifejezésben utalhatunk egy tömb N. elemére ami inkonzistens + a [Név] [Index] oszlop felépítéssel + >2*: ismétéls; a 2 igazán indokolatlan szám; logikusan vagy 1 feltételt mentek és tovább kezelem, + vagy végtelelen feltételt határozhatok meg egy sorban; az előbbi a logikus + az oszlop számok homogenitása miatt + + 30 oszlop + legalább 10 redundáns + +### Revoked ### + enum JMP { + no, + always, + wt, //when true + wf //when false + } + (IN) (OUT) + | LN | JMP | DEST | ASSIGNEMT | OPR1 | OPERATOR | OPR2 | COMMENT | + 1 0 i = 0 0 0 + 2 3 return 0 i < &argv[2] + 3 0 min = i 0 0 + 4 0 j = i + 1 + 5 3 9 0 j < n + 6 3 8 0 a[j] < a[min] + 7 0 min = j 0 0 + 8 0 j += 1 0 0 + 9 2 13 0 min = i + 10 0 call[0] = &a[i] 0 0 + 11 0 call[1] = &a[min] 0 0 + 12 1 &swap 0 call 0 0 + 13 0 i += 1 + 14 1 return 0 0 0 0 + + 9 oszlop + 0 redundancia + + + (IN) (OUT) + | Address | PK2 | JMP | DEST | ASSIGNEMT | OPR1 | OPERATOR | OPR2 | COMMENT | + 1 1 0 i = 0 0 0 + 2 1 3 return 0 i < &argv[2] + 3 1 0 min = i 0 0 + 3 2 0 j = i + 1 + 4 1 3 9 0 j < n + 5 1 3 8 0 a[j] < a[min] + 6 1 0 min = j 0 0 + 6 2 0 j += 1 0 0 + 7 1 2 13 0 min = i + 8 1 0 call[0] = &a[i] 0 0 + 8 2 0 call[1] = &a[min] 0 0 + 8 3 1 &swap 0 call 0 0 + 9 1 0 i += 1 + 10 1 1 return 0 0 0 0 + + 10 oszlop + az atom fogalma megmaradt -> mivel JT-ben nincs ilyen mekötés ezért értelmetlen megtartani + +// can oracle/mysql fork? +// is varchar allocated inline? + +since JT is ordered insertion is a nightmare + +the deletion problem is actually self evident. an elems life time is as long as we have any data on it. diff --git a/docs/fibonacci_algo.table b/docs/fibonacci_algo.table new file mode 100644 index 0000000..9269cfd --- /dev/null +++ b/docs/fibonacci_algo.table @@ -0,0 +1,45 @@ ++------------------------+-------+-----+------+--------------------------------------------------------------------------------+-----+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------+-----------------------------------------------------------------------------------------+------------------------------------------------+---+----+---+----+-------+----+---+-----+-----+----+-----+-----+----+-----+-----+----+-----+-----+--------------------------------------------------------------------------------+ +| Folyamatábra_elem | AD_ID | Ssz | V | V_ndx | VM | func | J id | ( | V1 | V_ndx | C1 | ) | AM | ( | V2 | V_ndx | C2 | ) | VB1 | CB1 | F1 | VJ1 | CJ1 | LM | VB2 | CB2 | F2 | VJ2 | CJ2 | AD_ID=Alg._detail_table, V=változó+index., VM=Változó múvelete,J id =Jump || Start | 1 | 1 | 2 | Start :indul a projekt,ami az AD_ID=2 Dataimput ID-val kezdődik/folytatódik | | | | | | | | | | | | | | | | | | | | | | | | | | | ++------------------------+-------+-----+------+--------------------------------------------------------------------------------+-----+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------+-----------------------------------------------------------------------------------------+------------------------------------------------+---+----+---+----+-------+----+---+-----+-----+----+-----+-----+----+-----+-----+----+-----+-----+--------------------------------------------------------------------------------+ +| DataInput | 2 | 1 | a | =' | | | 1 | Definició+ default constans értékadás Fibonecci 1. elemének | | | | | | | | | | | | | | | | | | | | | | || | 2 | 2 | b | =' | | | 2 | Definició+ default constans értékadás Fibonecci 2. elemének | | | | | | | | | | | | | | | | | | | | | | | ++------------------------+-------+-----+------+--------------------------------------------------------------------------------+-----+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------+-----------------------------------------------------------------------------------------+------------------------------------------------+---+----+---+----+-------+----+---+-----+-----+----+-----+-----+----+-----+-----+----+-----+-----+--------------------------------------------------------------------------------+ +| | 2 | 3 | f | =' | | 0 | Definició+ default constans értékadás aktuális Fibonacci számnak | | | | | | | | | | | | | | | | | | | | | | | || | 2 | 4 | ft | =' | 10 | Definiálom Fibonacci tömb max_tömbelemét | | | | | | | | | | | | | | | | | | | | | | | | || | 2 | 5 | x | =' | | | 0 | Definiálom az "x" ciklus változót | | | | | | | | | | | | | | | | | | | | | | || | 2 | 6 | 3 | AD_ID=3 ugrás objektum ciklus ini részére | | | | | | | | | | | | | | | | | | | | | | | | | | || Ciklus Ini dönt. pont | 3 | 1 | 4 | | | x | < | 10 | Számítási ciklus, addig ugrik a AD_ID=4-re amíg x<10 | | | | | | | | | | | | | | | | | | | | | || | 3 | 2 | 5 | x | >= | 10 | Else ág (X>=10) az 5-re ugrik | | | | | | | | | | | | | | | | | | | | | | | || C_mag számítás | 4 | 1 | f | =' | | a | +' | | b | | Akt_Fibo = előző két elem összege (a+b) | | | | | | | | | | | | | | | | | | | || | 4 | 2 | ft | x | =' | f | | | | Fibonacci tömb x index oszlopába tesszük az előzőleg számolt Fibo_akt értékét! | | | | | | | | | | | | | | | | | | | | || | 4 | 3 | a | =' | b | A kisebbik szám egyenlő lesz a nagyobbik számmal | | | | | | | | | | | | | | | | | | | | | | | | | ++------------------------+-------+-----+------+--------------------------------------------------------------------------------+-----+--------------------------------------------------------------------------------------------+------------------------------------------------------------------------+------------------------------------------------------------------+----------------------------------------------------------+-----------------------------------------------------------------------------------------+------------------------------------------------+---+----+---+----+-------+----+---+-----+-----+----+-----+-----+----+-----+-----+----+-----+-----+--------------------------------------------------------------------------------+ +| | 4 | 4 | b | =' | f | A nagyobbik szám egyenlő lesz a Fibo_szám-mal, ami a sorozat új eleme | | | | | | | | | | | | | | | | | | | | | | | | || | 4 | 5 | x | =' | x | +' | 1 | Növelem a ciklusváltozót | | | | | | | | | | | | | | | | | | | | | | || | 4 | 6 | 3 | Itt van vége a ciklus_magnak és a ciklus AD_ID=3 Dpontjára [4,5] | | | | | | | | | | | | | | | | | | | | | | | | | | || Ciklus Ini dönt. pont | 5 | 1 | 6 | | | x | < | 10 | Adatkiíró ciklus ini ,addig ugrik a 6-re amíg x<10 | | | | | | | | | | | | | | | | | | | | | || | 5 | 2 | 7 | x | >= | 10 | else ág => a 7-re ugrik | | | | | | | | | | | | | | | | | | | | | | | || C_mag output | 6 | 1 | f | x | Out | Fibo_tomb x indexű értékét 1= képernyőre, 2=pdf file, 3=cvs file-be jelenítem meg | | | | | | | | | | | | | | | | | | | | | | | | || | 6 | 2 | x | =' | x | +' | 1 | x ciklus változó értékének növelése | | | | | | | | | | | | | | | | | | | | | | || | 6 | 3 | 5 | Itt van vége a ciklus_magnak és a ciklus AD_ID=5 döntéspontjára ugrik | | | | | | | | | | | | | | | | | | | | | | | | | | || End | 7 | 1 | null | Vége a proginak | | | | | | | | | | | | | | | | | | | | | | | | | | |diff --git a/docs/problems.txt b/docs/problems.txt new file mode 100644 index 0000000..29a9c70 --- /dev/null +++ b/docs/problems.txt @@ -0,0 +1,6 @@ +### Where does jokertao.value end? ### +Surely its not a problem when using VARCHAR2, however i highly doubt there a formal definition for that thing. +Its not a trivial type. Meaning as is now JT cannot actually be stored everything. Its JT+VARCHAR2 that stores everything. +Now, we either fixate the value size, or normalize the struct. // perhaps it can somehow be stored as a JT, tho unmengling it for every entry doesnt sound very pleasant + ¤ by normalizing we get more columns + ¤ by fixing the size we get multi cell values diff --git a/obj/.placeholder b/obj/.placeholder new file mode 100644 index 0000000..e69de29 diff --git a/src/JokerTao.cpp b/src/JokerTao.cpp new file mode 100644 index 0000000..81d0680 --- /dev/null +++ b/src/JokerTao.cpp @@ -0,0 +1,36 @@ +#include "JokerTao.h" + +unsigned int JokerTao::id_ = JOKERTAO::END; + +namespace jt{ + inline std::vector<JokerTao> table; + + unsigned findf_relationship(int attr_, std::string value_){ + unsigned last_id = JOKERTAO::END - 1; + for(auto &&i : table[i]){ + if(i.id == last_id){ + continue; + } + + if(i.attr == attr && i.value == value_){ + return i.index; + } + } + + return JOKERTAO::INVALID; + } + + unsigned find_attribute(unsigned id_, JokerTao* attr_, unsigned index_ = 0){ + for(auto &&i : table[i]){ + if(i.id != id_){ + continue; + } + + if(i.attr == attr){ + return i.index; + } + } + + return JOKERTAO::INVALID; + } +} diff --git a/src/JokerTao.h b/src/JokerTao.h new file mode 100644 index 0000000..e0744ab --- /dev/null +++ b/src/JokerTao.h @@ -0,0 +1,33 @@ +#pragma once + +#include <vector> + +enum JOKERTAO{ + INVALID, + NAME, + VALUE + END +}; + +struct JokerTao{ + unsigned id; + JokerTao *attr; + unsigned index; + std::string value; //void* value = nullptr; + + inline unsigned last_id(){ return id__; } + JokerTao(attr_ = nullptr, value_ = "", id_ = id__++, index_ = 0) : + id(id_), attr(attr_), index(index_), value(value_){ + } + delete JokerTao(); + + private: + static unsigned id__; +}; + +namespace jt{ + extern std::vector<JokerTao> table; + + unsigned findf_relationship(unsigned attr_, std::string value_); + unsigned find_attribute(unsigned index_, JokerTao* attr_, unsigned index_ = 0); +} diff --git a/src/algo.cpp b/src/algo.cpp new file mode 100644 index 0000000..4adb8e4 --- /dev/null +++ b/src/algo.cpp @@ -0,0 +1,240 @@ +#include "algo.h" + +namespace algo{ + /* VARS */ + inline std::stack<struct frame> callstack; + inline std::unordered_map<std::string, std::string> vars; + /**/ + + bool init(){ + const char *const *c = standard_vars-1; + while((c++, *c != NULL)){ + vars[*c] = EMPTY_CELL; + } + callstack.emplace(); + + return true; + } + + void assign(const std::string &name, const std::string& val){ + vars[name] = val; + //unsigned index = findf_relationship(NAME, v[ALGO::DESTINATION]); + //if(index == JOKERTAO::INVALID){ + // jt::table.emplace_back(JOKERTAO::NAME, v[ALGO::DESTINATION]); + // jt::table.emplace_back(JOKERTAO::VALUE, v[ALGO::VALUE], 0); + //}else{ + // jt[index].find_attribute() + //} + } + + void pop(unsigned &head){ + if(callstack.size() == 1){ + #ifdef DEBUG + printf(YELLOW "Return on empty callstack: exiting.\n" NORMAL); + #endif + raise(SIGINT); + } + callstack.pop(); + head = callstack.top().bp + 1; + } + void push(const unsigned &head){ + callstack.top().bp = head; + callstack.emplace(); + } + + std::string arref(std::string s){ + for(int i = s.size()-1, last = i; i >= 0; i--){ + if(s[i] == '-'){ + s.replace(i+1, last-i, deref(s.substr(i+1, last-i))); + } + } + + return s; + } + + std::string deref(std::string s){ + s = arref(s); + return (vars.find(s) != vars.end()) ? deref(vars[s]) : s; + } + + bool jump(unsigned &head, const jmp &do_jump, const std::string &dest, const char &push){ + bool ret = false; + switch(do_jump){ + case algo::jmp::nop: + break; + case algo::jmp::jmp: + algo::call(head, dest, (push != algo::EMPTY_CELL)); + ret = true; + break; + case algo::jmp::je: + if(algo::vars["eval"] == algo::true_){ + algo::call(head, dest, (push != algo::EMPTY_CELL)); + ret = true; + } + break; + case algo::jmp::jne: + if(algo::vars["eval"] == algo::false_){ + algo::call(head, dest, (push != algo::EMPTY_CELL)); + ret = true; + } + break; + } + #ifdef DEBUG + if(ret){ + printf(MAGENTA "Jumped to '%s' (%s). " NORMAL, dest.c_str(), algo::deref(dest).c_str()); + } + printf(MAGENTA "Head at %d. " NORMAL, head); + #endif + return ret; + } + + void call(unsigned &head, const std::string &dest, const bool &is_push){ + #ifdef DEBUG + long debug_stacklen = algo::callstack.size(); + #endif + if(dest == "return"){ + pop(head); + goto RETURN; + } + + if(is_push){ push(head); } + + try{ + head = std::stoi(deref(dest)); + }catch(...){ + #ifdef DEBUG + printf("Invalid jump destination '%s'", dest.c_str()); + #endif + goto RETURN; + } + + RETURN: + #ifdef DEBUG + if(algo::callstack.size() != debug_stacklen){ + printf(YELLOW "[Stack: %ld->%ld] " NORMAL, debug_stacklen, algo::callstack.size()); + } + #endif + } + + int aint(std::string s){ + static const char c[] = "0123456789"; + int i = 0; + bool sign = true; + while(true){ + switch(s[i]){ + case '-': + sign = !(sign xor false); + ++i; + continue; + case '+': + sign = !(sign xor true); + ++i; + continue; + default: + goto BR; + } + } + BR: + s.erase(0, i); + + for(i = 0; i < s.size(); i++){ + bool is_any = false; + for(int h = 0, end2 = sizeof(c)-1; h < end2; h++){ + if(s[i] == c[h]){ + is_any = true; + break; + } + } + if(not is_any){ + s.erase(i, 1); + --i; + } + } + if(s == ""){ s = '0'; } + + return std::stoi(s) * (sign ? 1 : -1); + } + + + void do_eval(const std::string &opr1, char oper, const std::string &opr2){ // convert to use assign() + #define CASECHAR(c){ \ + case (char)(*#c):\ + vars["eval"] = std::to_string(aint(deref(opr1)) c aint(deref(opr2))); \ + } + #define CASELCHAR(c){ \ + case (char)(*#c):\ + aint(deref(opr1)) c aint(deref(opr2)) ? (vars["eval"] = true_) : (vars["eval"] = false_); \ + } + //is_cmp = (oper == EMPTY_CELL); + vars["eval"] = ""; + switch(oper){ + case EMPTY_CELL: + vars["eval"] = deref(opr1); + return; + case '.': + vars["eval"] = deref(opr1) + deref(opr2); + return; + case '\'': + vars["eval"] = deref(opr1).substr(0, opr1.size()-aint(deref(opr2))); + return; + CASECHAR(-) + return; + CASECHAR(+) + return; + CASECHAR(/) + return; + CASECHAR(*) + return; + CASECHAR(%) + return; + CASELCHAR(<) + return; + CASELCHAR(>) + return; + case '=': + aint(deref(opr1)) == aint(deref(opr2)) ? (vars["eval"] = true_) : (vars["eval"] = false_); \ + return; + CASELCHAR(|) + return; + CASELCHAR(&) + return; + CASELCHAR(^) + return; + } + } +} + + + //int aint(std::string s){ + // static const char c[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; // ?! + // bool sign = true; + // int roof = 1; + // + // for(int i = 0, int end = s.size(); i < end; i++){ + // int h = 0; + // for(int e = sizeof(c)-1; h < e; h++){ + // if(c[h] == s[i]){ + // while(pow(2, roof) < h){ + // ++roof; + // } + // goto CONT; + // } + // } + // switch(s[i]){ + // case '-': + // sign = !(sign xor false); + // s.rease(i, 1); + // --i; + // continue; + // case '+': + // sign = !(sign xor true); + // s.rease(i, 1); + // --i; + // continue; + // default: + // s[i] = '0'; + // continue; + // } + // CONT: + // } + //} diff --git a/src/algo.h b/src/algo.h new file mode 100644 index 0000000..7725fde --- /dev/null +++ b/src/algo.h @@ -0,0 +1,76 @@ +#include <math.h> +#include <signal.h> +#include <string> +#include <stack> +#include <unordered_map> +#ifdef DEBUG +# include "debug.h" +#endif + +namespace algo{ + /* CONST */ + enum{ +// LINE, + JUMP, + DESTINATION, + ASSIGNMENT, + OPR1, + OPERATOR, + OPR2, + COMMENT + }; + + enum class jmp{ + nop, + jmp, + je, + jne + }; + + const char EMPTY_CELL = '0'; + + const char true_[] = "TRUE"; + const char false_[] = "FALSE"; + + struct frame{ + unsigned bp; + }; + + const char* const standard_vars[] = { + "eval", + "ret", + "argv-0", + "argv-1", + "argv-2", + "argv-3", + "argv-4", + "argv-5", + "argv-6", + "argv-7", + "argv-8", + "argv-9", + NULL + }; + /**/ + + /* VARS */ + extern std::stack<struct frame> callstack; + extern std::unordered_map<std::string, std::string> vars; + /**/ + + bool init(); + + std::string arref(std::string s); + std::string deref(std::string s); + int aint(std::string s); + + void assign(const std::string &name, const std::string& val); + + void do_eval(const std::string &opr1, char oper, const std::string &opr2); + + void pop(unsigned &head); + void push(const unsigned &head); + + bool jump(unsigned &head, const jmp &do_jump, const std::string &dest, const char &push); + void call(unsigned &head, const std::string &dest, const bool &is_push); +} diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..8a92e03 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,23 @@ +#define DEBUG_COLOR + +#ifdef DEBUG_COLOR +# define NORMAL "\033[0m" +# define BOLD "\033[1m" +# define RED "\033[31m" +# define GREEN "\033[32m" +# define YELLOW "\033[33m" +# define BLUE "\033[34m" +# define MAGENTA "\033[35m" +# define CYAN "\033[36m" +# define WHITE "\033[37m" +#else +# define NORMAL "" +# define BOLD "" +# define RED "" +# define GREEN "" +# define YELLOW "" +# define BLUE "" +# define MAGENTA "" +# define CYAN "" +# define WHITE "" +#endif diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..c64a430 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,87 @@ +#include "lexer.h" + +#include "rapidcsv.h" +#include "algo.h" + + +inline unsigned long long instruction_counter = 1; + +void lex_file(const char* const f){ + rapidcsv::Document doc(f, rapidcsv::LabelParams(-1, -1), + rapidcsv::SeparatorParams(';') + ); + std::vector<std::string> buf; + + for(unsigned head = 1, end = doc.GetRowCount()+1; head < end; head++){ + #ifdef DEBUG + printf(BOLD BLUE "%0#6llx" NORMAL "|", instruction_counter++); + printf(BOLD "%s:%d: " NORMAL, f, head); + #endif + const std::string c = doc.GetCell<std::string>(0, head-1); + if(c[0] == '"'){ // skip comments + #ifdef DEBUG + printf(GREEN "Comment: %s\n" NORMAL, c.c_str()); + #endif + continue; + } + buf = doc.GetRow<std::string>(head-1); + lex_row(buf, head); + } +} + +void lex_row(std::vector<std::string> &v, unsigned &head){ + /* --- MUST COMPUTE --- */ + algo::do_eval(v[algo::OPR1], v[algo::OPERATOR][0], v[algo::OPR2]); + /* --- */ + + /* --- INSTRUCTION BREANCHING --- */ + /* Is jump*/ + if(v[algo::JUMP][0] != algo::EMPTY_CELL){ + algo::jmp j; + try{ + j = (algo::jmp)(stoi(v[algo::JUMP])); + }catch(...){ + puts(RED "Non sensical jump condition." NORMAL); + return; + } + #ifdef DEBUG + printf(MAGENTA "Jump condition %s (%s) %s %s (%s) with %d. " NORMAL, + v[algo::OPR1].c_str(), + algo::deref(v[algo::OPR1]).c_str(), + v[algo::OPERATOR].c_str(), + algo::deref(v[algo::OPR2]).c_str(), + v[algo::OPR2].c_str(), + (int)j + ); + #endif + if(algo::jump(head, j, v[algo::DESTINATION], v[algo::ASSIGNMENT][0])){ + --head; + #ifdef DEBUG + puts(MAGENTA "Continueing." NORMAL); + #endif + }else{ + #ifdef DEBUG + puts(MAGENTA "Falling through." NORMAL); + #endif + } + return; + } + /* Is assignemt */ + if(v[algo::ASSIGNMENT][0] != algo::EMPTY_CELL){ + algo::assign(algo::arref(v[algo::DESTINATION]), algo::vars["eval"]); + #ifdef DEBUG + printf(CYAN "Assigned '%s' to '%s' (%s). (%s %s %s %c %s)\n" NORMAL, + algo::vars["eval"].c_str(), + v[algo::DESTINATION].c_str(), + algo::arref(v[algo::DESTINATION]).c_str(), + // + v[algo::DESTINATION].c_str(), + v[algo::ASSIGNMENT].c_str(), + v[algo::OPR1].c_str(), + v[algo::OPERATOR][0], + v[algo::OPR2].c_str()); + #endif + return; + } + /* --- */ +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..75b7323 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,8 @@ +#include <vector> +#include <string> +#include <iostream> + +void lex_file(const char* const f); +void lex_row(std::vector<std::string> &v, unsigned &head); + +extern unsigned long long instruction_counter; diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..b8d2062 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,68 @@ +#include <string.h> +#include <signal.h> +#include <iostream> +#include <vector> +#include <string> +#include <algorithm> +//#include "JokerTao.h" +#include <unordered_map> +// +#include "algo.h" +#include "lexer.h" + +using namespace std; + +char* f; + +[[ noreturn ]] void end(int ignore = 0){ + #ifdef DEBUG + printf(NORMAL); + #endif + + #ifdef DEBUG + printf(GREEN "\" Standard variables\n" NORMAL); + const char *const *c = algo::standard_vars-1; + while((c++, *c != NULL)){ + printf("%s: %s\n", *c, algo::vars[*c].c_str()); + algo::vars.erase(*c); + } + + printf(GREEN "\" User variables\n" NORMAL); + vector<char*> vpbuf; + for(auto &&i : algo::vars){ + char *a; + asprintf(&a, "%s: %s\n", i.first.c_str(), i.second.c_str()); + vpbuf.push_back(a); + } + std::sort(vpbuf.begin(), vpbuf.end(), [](char* a, char* b){ return (strcmp(a, b) < 1); }); + for(auto &&i : vpbuf){ + cout << i; + } + #endif + + exit(2); +} + +bool init(){ + ios::sync_with_stdio(true); + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + signal(SIGINT, end); + + printf("[init] "); + algo::init(); + puts(""); + + return true; +} + +signed main(int argc, char* argv[]){ + if(argc < 2){ return 0; } + + init(); + + f = argv[1]; + lex_file(f); + + end(); +} diff --git a/src/rapidcsv.h b/src/rapidcsv.h new file mode 100644 index 0000000..9cc38ae --- /dev/null +++ b/src/rapidcsv.h @@ -0,0 +1,1821 @@ +/* + * rapidcsv.h + * + * URL: https://github.com/d99kris/rapidcsv + * Version: 8.70 + * + * Copyright (C) 2017-2022 Kristofer Berggren + * All rights reserved. + * + * rapidcsv is distributed under the BSD 3-Clause license, see LICENSE for details. + * + */ + +#pragma once + +#include <algorithm> +#include <cassert> +#include <cmath> +#ifdef HAS_CODECVT +#include <codecvt> +#include <locale> +#endif +#include <fstream> +#include <functional> +#include <iostream> +#include <limits> +#include <map> +#include <sstream> +#include <string> +#include <typeinfo> +#include <vector> + +#if defined(_MSC_VER) +#include <BaseTsd.h> +typedef SSIZE_T ssize_t; +#endif + +namespace rapidcsv +{ +#if defined(_MSC_VER) + static const bool sPlatformHasCR = true; +#else + static const bool sPlatformHasCR = false; +#endif + + /** + * @brief Datastructure holding parameters controlling how invalid numbers (including + * empty strings) should be handled. + */ + struct ConverterParams + { + /** + * @brief Constructor + * @param pHasDefaultConverter specifies if conversion of non-numerical strings shall be + * converted to a default numerical value, instead of causing + * an exception to be thrown (default). + * @param pDefaultFloat floating-point default value to represent invalid numbers. + * @param pDefaultInteger integer default value to represent invalid numbers. + * @param pNumericLocale specifies whether to honor LC_NUMERIC locale (default + * true). + */ + explicit ConverterParams(const bool pHasDefaultConverter = false, + const long double pDefaultFloat = std::numeric_limits<long double>::signaling_NaN(), + const long long pDefaultInteger = 0, + const bool pNumericLocale = true) + : mHasDefaultConverter(pHasDefaultConverter) + , mDefaultFloat(pDefaultFloat) + , mDefaultInteger(pDefaultInteger) + , mNumericLocale(pNumericLocale) + { + } + + /** + * @brief specifies if conversion of non-numerical strings shall be converted to a default + * numerical value, instead of causing an exception to be thrown (default). + */ + bool mHasDefaultConverter; + + /** + * @brief floating-point default value to represent invalid numbers. + */ + long double mDefaultFloat; + + /** + * @brief integer default value to represent invalid numbers. + */ + long long mDefaultInteger; + + /** + * @brief specifies whether to honor LC_NUMERIC locale. + */ + bool mNumericLocale; + }; + + /** + * @brief Exception thrown when attempting to access Document data in a datatype which + * is not supported by the Converter class. + */ + class no_converter : public std::exception + { + /** + * @brief Provides details about the exception + * @returns an explanatory string + */ + virtual const char* what() const throw() + { + return "unsupported conversion datatype"; + } + }; + + /** + * @brief Class providing conversion to/from numerical datatypes and strings. Only + * intended for rapidcsv internal usage, but exposed externally to allow + * specialization for custom datatype conversions. + */ + template<typename T> + class Converter + { + public: + /** + * @brief Constructor + * @param pConverterParams specifies how conversion of non-numerical values to + * numerical datatype shall be handled. + */ + Converter(const ConverterParams& pConverterParams) + : mConverterParams(pConverterParams) + { + } + + /** + * @brief Converts numerical value to string representation. + * @param pVal numerical value + * @param pStr output string + */ + void ToStr(const T& pVal, std::string& pStr) const + { + if (typeid(T) == typeid(int) || + typeid(T) == typeid(long) || + typeid(T) == typeid(long long) || + typeid(T) == typeid(unsigned) || + typeid(T) == typeid(unsigned long) || + typeid(T) == typeid(unsigned long long) || + typeid(T) == typeid(float) || + typeid(T) == typeid(double) || + typeid(T) == typeid(long double) || + typeid(T) == typeid(char)) + { + std::ostringstream out; + out << pVal; + pStr = out.str(); + } + else + { + throw no_converter(); + } + } + + /** + * @brief Converts string holding a numerical value to numerical datatype representation. + * @param pVal numerical value + * @param pStr output string + */ + void ToVal(const std::string& pStr, T& pVal) const + { + try + { + if (typeid(T) == typeid(int)) + { + pVal = static_cast<T>(std::stoi(pStr)); + return; + } + else if (typeid(T) == typeid(long)) + { + pVal = static_cast<T>(std::stol(pStr)); + return; + } + else if (typeid(T) == typeid(long long)) + { + pVal = static_cast<T>(std::stoll(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned)) + { + pVal = static_cast<T>(std::stoul(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned long)) + { + pVal = static_cast<T>(std::stoul(pStr)); + return; + } + else if (typeid(T) == typeid(unsigned long long)) + { + pVal = static_cast<T>(std::stoull(pStr)); + return; + } + } + catch (...) + { + if (!mConverterParams.mHasDefaultConverter) + { + throw; + } + else + { + pVal = static_cast<T>(mConverterParams.mDefaultInteger); + return; + } + } + + try + { + if (mConverterParams.mNumericLocale) + { + if (typeid(T) == typeid(float)) + { + pVal = static_cast<T>(std::stof(pStr)); + return; + } + else if (typeid(T) == typeid(double)) + { + pVal = static_cast<T>(std::stod(pStr)); + return; + } + else if (typeid(T) == typeid(long double)) + { + pVal = static_cast<T>(std::stold(pStr)); + return; + } + } + else + { + if ((typeid(T) == typeid(float)) || + (typeid(T) == typeid(double)) || + (typeid(T) == typeid(long double))) + { + std::istringstream iss(pStr); + iss >> pVal; + if (iss.fail() || iss.bad() || !iss.eof()) + { + throw std::invalid_argument("istringstream: no conversion"); + } + return; + } + } + } + catch (...) + { + if (!mConverterParams.mHasDefaultConverter) + { + throw; + } + else + { + pVal = static_cast<T>(mConverterParams.mDefaultFloat); + return; + } + } + + if (typeid(T) == typeid(char)) + { + pVal = static_cast<T>(pStr[0]); + return; + } + else + { + throw no_converter(); + } + } + + private: + const ConverterParams& mConverterParams; + }; + + /** + * @brief Specialized implementation handling string to string conversion. + * @param pVal string + * @param pStr string + */ + template<> + inline void Converter<std::string>::ToStr(const std::string& pVal, std::string& pStr) const + { + pStr = pVal; + } + + /** + * @brief Specialized implementation handling string to string conversion. + * @param pVal string + * @param pStr string + */ + template<> + inline void Converter<std::string>::ToVal(const std::string& pStr, std::string& pVal) const + { + pVal = pStr; + } + + template<typename T> + using ConvFunc = std::function<void (const std::string & pStr, T & pVal)>; + + /** + * @brief Datastructure holding parameters controlling which row and column should be + * treated as labels. + */ + struct LabelParams + { + /** + * @brief Constructor + * @param pColumnNameIdx specifies the zero-based row index of the column labels, setting + * it to -1 prevents column lookup by label name, and gives access + * to all rows as document data. Default: 0 + * @param pRowNameIdx specifies the zero-based column index of the row labels, setting + * it to -1 prevents row lookup by label name, and gives access + * to all columns as document data. Default: -1 + */ + explicit LabelParams(const ssize_t pColumnNameIdx = 0, const ssize_t pRowNameIdx = -1) + : mColumnNameIdx(pColumnNameIdx) + , mRowNameIdx(pRowNameIdx) + { + if (mColumnNameIdx < -1) + { + const std::string errStr = "invalid column name index " + + std::to_string(mColumnNameIdx) + " < -1"; + throw std::out_of_range(errStr); + } + + if (mRowNameIdx < -1) + { + const std::string errStr = "invalid row name index " + + std::to_string(mRowNameIdx) + " < -1"; + throw std::out_of_range(errStr); + } + } + + /** + * @brief specifies the zero-based row index of the column labels. + */ + ssize_t mColumnNameIdx; + + /** + * @brief specifies the zero-based column index of the row labels. + */ + ssize_t mRowNameIdx; + }; + + /** + * @brief Datastructure holding parameters controlling how the CSV data fields are separated. + */ + struct SeparatorParams + { + /** + * @brief Constructor + * @param pSeparator specifies the column separator (default ','). + * @param pTrim specifies whether to trim leading and trailing spaces from + * cells read (default false). + * @param pHasCR specifies whether a new document (i.e. not an existing document read) + * should use CR/LF instead of only LF (default is to use standard + * behavior of underlying platforms - CR/LF for Win, and LF for others). + * @param pQuotedLinebreaks specifies whether to allow line breaks in quoted text (default false) + * @param pAutoQuote specifies whether to automatically dequote data during read, and add + * quotes during write (default true). + * @param pQuoteChar specifies the quote character (default '\"'). + */ + explicit SeparatorParams(const char pSeparator = ',', const bool pTrim = false, + const bool pHasCR = sPlatformHasCR, const bool pQuotedLinebreaks = false, + const bool pAutoQuote = true, const char pQuoteChar = '"') + : mSeparator(pSeparator) + , mTrim(pTrim) + , mHasCR(pHasCR) + , mQuotedLinebreaks(pQuotedLinebreaks) + , mAutoQuote(pAutoQuote) + , mQuoteChar(pQuoteChar) + { + } + + /** + * @brief specifies the column separator. + */ + char mSeparator; + + /** + * @brief specifies whether to trim leading and trailing spaces from cells read. + */ + bool mTrim; + + /** + * @brief specifies whether new documents should use CR/LF instead of LF. + */ + bool mHasCR; + + /** + * @brief specifies whether to allow line breaks in quoted text. + */ + bool mQuotedLinebreaks; + + /** + * @brief specifies whether to automatically dequote cell data. + */ + bool mAutoQuote; + + /** + * @brief specifies the quote character. + */ + char mQuoteChar; + }; + + /** + * @brief Datastructure holding parameters controlling how special line formats should be + * treated. + */ + struct LineReaderParams + { + /** + * @brief Constructor + * @param pSkipCommentLines specifies whether to skip lines prefixed with + * mCommentPrefix. Default: false + * @param pCommentPrefix specifies which prefix character to indicate a comment + * line. Default: # + * @param pSkipEmptyLines specifies whether to skip empty lines. Default: false + */ + explicit LineReaderParams(const bool pSkipCommentLines = false, + const char pCommentPrefix = '#', + const bool pSkipEmptyLines = false) + : mSkipCommentLines(pSkipCommentLines) + , mCommentPrefix(pCommentPrefix) + , mSkipEmptyLines(pSkipEmptyLines) + { + } + + /** + * @brief specifies whether to skip lines prefixed with mCommentPrefix. + */ + bool mSkipCommentLines; + + /** + * @brief specifies which prefix character to indicate a comment line. + */ + char mCommentPrefix; + + /** + * @brief specifies whether to skip empty lines. + */ + bool mSkipEmptyLines; + }; + + /** + * @brief Class representing a CSV document. + */ + class Document + { + public: + /** + * @brief Constructor + * @param pPath specifies the path of an existing CSV-file to populate the Document + * data with. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + explicit Document(const std::string& pPath = std::string(), + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + : mPath(pPath) + , mLabelParams(pLabelParams) + , mSeparatorParams(pSeparatorParams) + , mConverterParams(pConverterParams) + , mLineReaderParams(pLineReaderParams) + , mData() + , mColumnNames() + , mRowNames() + { + if (!mPath.empty()) + { + ReadCsv(); + } + } + + /** + * @brief Constructor + * @param pStream specifies a binary input stream to read CSV data from. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + explicit Document(std::istream& pStream, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + : mPath() + , mLabelParams(pLabelParams) + , mSeparatorParams(pSeparatorParams) + , mConverterParams(pConverterParams) + , mLineReaderParams(pLineReaderParams) + , mData() + , mColumnNames() + , mRowNames() + { + ReadCsv(pStream); + } + + /** + * @brief Read Document data from file. + * @param pPath specifies the path of an existing CSV-file to populate the Document + * data with. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + void Load(const std::string& pPath, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + { + mPath = pPath; + mLabelParams = pLabelParams; + mSeparatorParams = pSeparatorParams; + mConverterParams = pConverterParams; + mLineReaderParams = pLineReaderParams; + ReadCsv(); + } + + /** + * @brief Read Document data from stream. + * @param pStream specifies a binary input stream to read CSV data from. + * @param pLabelParams specifies which row and column should be treated as labels. + * @param pSeparatorParams specifies which field and row separators should be used. + * @param pConverterParams specifies how invalid numbers (including empty strings) should be + * handled. + * @param pLineReaderParams specifies how special line formats should be treated. + */ + void Load(std::istream& pStream, + const LabelParams& pLabelParams = LabelParams(), + const SeparatorParams& pSeparatorParams = SeparatorParams(), + const ConverterParams& pConverterParams = ConverterParams(), + const LineReaderParams& pLineReaderParams = LineReaderParams()) + { + mPath = ""; + mLabelParams = pLabelParams; + mSeparatorParams = pSeparatorParams; + mConverterParams = pConverterParams; + mLineReaderParams = pLineReaderParams; + ReadCsv(pStream); + } + + /** + * @brief Write Document data to file. + * @param pPath optionally specifies the path where the CSV-file will be created + * (if not specified, the original path provided when creating or + * loading the Document data will be used). + */ + void Save(const std::string& pPath = std::string()) + { + if (!pPath.empty()) + { + mPath = pPath; + } + WriteCsv(); + } + + /** + * @brief Write Document data to stream. + * @param pStream specifies a binary output stream to write the data to. + */ + void Save(std::ostream& pStream) const + { + WriteCsv(pStream); + } + + /** + * @brief Clears loaded Document data. + * + */ + void Clear() + { + mData.clear(); + mColumnNames.clear(); + mRowNames.clear(); +#ifdef HAS_CODECVT + mIsUtf16 = false; + mIsLE = false; +#endif + } + + /** + * @brief Get column index by name. + * @param pColumnName column label name. + * @returns zero-based column index. + */ + ssize_t GetColumnIdx(const std::string& pColumnName) const + { + if (mLabelParams.mColumnNameIdx >= 0) + { + if (mColumnNames.find(pColumnName) != mColumnNames.end()) + { + return static_cast<ssize_t>(mColumnNames.at(pColumnName)) - (mLabelParams.mRowNameIdx + 1); + } + } + return -1; + } + + /** + * @brief Get column by index. + * @param pColumnIdx zero-based column index. + * @returns vector of column data. + */ + template<typename T> + std::vector<T> GetColumn(const size_t pColumnIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + std::vector<T> column; + Converter<T> converter(mConverterParams); + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + if (dataColumnIdx < itRow->size()) + { + T val; + converter.ToVal(itRow->at(dataColumnIdx), val); + column.push_back(val); + } + else + { + const std::string errStr = "requested column index " + + std::to_string(pColumnIdx) + " >= " + + std::to_string(itRow->size() - GetDataColumnIndex(0)) + + " (number of columns on row index " + + std::to_string(std::distance(mData.begin(), itRow) - + (mLabelParams.mColumnNameIdx + 1)) + ")"; + throw std::out_of_range(errStr); + } + } + } + return column; + } + + /** + * @brief Get column by index. + * @param pColumnIdx zero-based column index. + * @param pToVal conversion function. + * @returns vector of column data. + */ + template<typename T> + std::vector<T> GetColumn(const size_t pColumnIdx, ConvFunc<T> pToVal) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + std::vector<T> column; + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + T val; + pToVal(itRow->at(dataColumnIdx), val); + column.push_back(val); + } + } + return column; + } + + /** + * @brief Get column by name. + * @param pColumnName column label name. + * @returns vector of column data. + */ + template<typename T> + std::vector<T> GetColumn(const std::string& pColumnName) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + return GetColumn<T>(static_cast<size_t>(columnIdx)); + } + + /** + * @brief Get column by name. + * @param pColumnName column label name. + * @param pToVal conversion function. + * @returns vector of column data. + */ + template<typename T> + std::vector<T> GetColumn(const std::string& pColumnName, ConvFunc<T> pToVal) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + return GetColumn<T>(static_cast<size_t>(columnIdx), pToVal); + } + + /** + * @brief Set column by index. + * @param pColumnIdx zero-based column index. + * @param pColumn vector of column data. + */ + template<typename T> + void SetColumn(const size_t pColumnIdx, const std::vector<T>& pColumn) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + + while (GetDataRowIndex(pColumn.size()) > GetDataRowCount()) + { + std::vector<std::string> row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if ((dataColumnIdx + 1) > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + itRow->resize(GetDataColumnIndex(dataColumnIdx + 1)); + } + } + + Converter<T> converter(mConverterParams); + for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) + { + std::string str; + converter.ToStr(*itRow, str); + mData.at(static_cast<size_t>(std::distance(pColumn.begin(), itRow) + mLabelParams.mColumnNameIdx + 1)).at( + dataColumnIdx) = str; + } + } + + /** + * @brief Set column by name. + * @param pColumnName column label name. + * @param pColumn vector of column data. + */ + template<typename T> + void SetColumn(const std::string& pColumnName, const std::vector<T>& pColumn) + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + SetColumn<T>(static_cast<size_t>(columnIdx), pColumn); + } + + /** + * @brief Remove column by index. + * @param pColumnIdx zero-based column index. + */ + void RemoveColumn(const size_t pColumnIdx) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + itRow->erase(itRow->begin() + static_cast<ssize_t>(dataColumnIdx)); + } + + UpdateColumnNames(); + } + + /** + * @brief Remove column by name. + * @param pColumnName column label name. + */ + void RemoveColumn(const std::string& pColumnName) + { + ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + RemoveColumn(static_cast<size_t>(columnIdx)); + } + + /** + * @brief Insert column at specified index. + * @param pColumnIdx zero-based column index. + * @param pColumn vector of column data (optional argument). + * @param pColumnName column label name (optional argument). + */ + template<typename T> + void InsertColumn(const size_t pColumnIdx, const std::vector<T>& pColumn = std::vector<T>(), + const std::string& pColumnName = std::string()) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + + std::vector<std::string> column; + if (pColumn.empty()) + { + column.resize(GetDataRowCount()); + } + else + { + column.resize(GetDataRowIndex(pColumn.size())); + Converter<T> converter(mConverterParams); + for (auto itRow = pColumn.begin(); itRow != pColumn.end(); ++itRow) + { + std::string str; + converter.ToStr(*itRow, str); + const size_t rowIdx = + static_cast<size_t>(std::distance(pColumn.begin(), itRow) + (mLabelParams.mColumnNameIdx + 1)); + column.at(rowIdx) = str; + } + } + + while (column.size() > GetDataRowCount()) + { + std::vector<std::string> row; + const size_t columnCount = std::max<size_t>(static_cast<size_t>(mLabelParams.mColumnNameIdx + 1), + GetDataColumnCount()); + row.resize(columnCount); + mData.push_back(row); + } + + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + const size_t rowIdx = static_cast<size_t>(std::distance(mData.begin(), itRow)); + itRow->insert(itRow->begin() + static_cast<ssize_t>(dataColumnIdx), column.at(rowIdx)); + } + + if (!pColumnName.empty()) + { + SetColumnName(pColumnIdx, pColumnName); + } + + UpdateColumnNames(); + } + + /** + * @brief Get number of data columns (excluding label columns). + * @returns column count. + */ + size_t GetColumnCount() const + { + const ssize_t count = static_cast<ssize_t>((mData.size() > 0) ? mData.at(0).size() : 0) - + (mLabelParams.mRowNameIdx + 1); + return (count >= 0) ? static_cast<size_t>(count) : 0; + } + + /** + * @brief Get row index by name. + * @param pRowName row label name. + * @returns zero-based row index. + */ + ssize_t GetRowIdx(const std::string& pRowName) const + { + if (mLabelParams.mRowNameIdx >= 0) + { + if (mRowNames.find(pRowName) != mRowNames.end()) + { + return static_cast<ssize_t>(mRowNames.at(pRowName)) - (mLabelParams.mColumnNameIdx + 1); + } + } + return -1; + } + + /** + * @brief Get row by index. + * @param pRowIdx zero-based row index. + * @returns vector of row data. + */ + template<typename T> + std::vector<T> GetRow(const size_t pRowIdx) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + std::vector<T> row; + Converter<T> converter(mConverterParams); + for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) + { + if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) + { + T val; + converter.ToVal(*itCol, val); + row.push_back(val); + } + } + return row; + } + + /** + * @brief Get row by index. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns vector of row data. + */ + template<typename T> + std::vector<T> GetRow(const size_t pRowIdx, ConvFunc<T> pToVal) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + std::vector<T> row; + Converter<T> converter(mConverterParams); + for (auto itCol = mData.at(dataRowIdx).begin(); itCol != mData.at(dataRowIdx).end(); ++itCol) + { + if (std::distance(mData.at(dataRowIdx).begin(), itCol) > mLabelParams.mRowNameIdx) + { + T val; + pToVal(*itCol, val); + row.push_back(val); + } + } + return row; + } + + /** + * @brief Get row by name. + * @param pRowName row label name. + * @returns vector of row data. + */ + template<typename T> + std::vector<T> GetRow(const std::string& pRowName) const + { + ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return GetRow<T>(static_cast<size_t>(rowIdx)); + } + + /** + * @brief Get row by name. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns vector of row data. + */ + template<typename T> + std::vector<T> GetRow(const std::string& pRowName, ConvFunc<T> pToVal) const + { + ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return GetRow<T>(static_cast<size_t>(rowIdx), pToVal); + } + + /** + * @brief Set row by index. + * @param pRowIdx zero-based row index. + * @param pRow vector of row data. + */ + template<typename T> + void SetRow(const size_t pRowIdx, const std::vector<T>& pRow) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + while ((dataRowIdx + 1) > GetDataRowCount()) + { + std::vector<std::string> row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if (pRow.size() > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + itRow->resize(GetDataColumnIndex(pRow.size())); + } + } + + Converter<T> converter(mConverterParams); + for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) + { + std::string str; + converter.ToStr(*itCol, str); + mData.at(dataRowIdx).at(static_cast<size_t>(std::distance(pRow.begin(), + itCol) + mLabelParams.mRowNameIdx + 1)) = str; + } + } + + /** + * @brief Set row by name. + * @param pRowName row label name. + * @param pRow vector of row data. + */ + template<typename T> + void SetRow(const std::string& pRowName, const std::vector<T>& pRow) + { + ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + return SetRow<T>(static_cast<size_t>(rowIdx), pRow); + } + + /** + * @brief Remove row by index. + * @param pRowIdx zero-based row index. + */ + void RemoveRow(const size_t pRowIdx) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + mData.erase(mData.begin() + static_cast<ssize_t>(dataRowIdx)); + UpdateRowNames(); + } + + /** + * @brief Remove row by name. + * @param pRowName row label name. + */ + void RemoveRow(const std::string& pRowName) + { + ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + RemoveRow(static_cast<size_t>(rowIdx)); + } + + /** + * @brief Insert row at specified index. + * @param pRowIdx zero-based row index. + * @param pRow vector of row data (optional argument). + * @param pRowName row label name (optional argument). + */ + template<typename T> + void InsertRow(const size_t pRowIdx, const std::vector<T>& pRow = std::vector<T>(), + const std::string& pRowName = std::string()) + { + const size_t rowIdx = GetDataRowIndex(pRowIdx); + + std::vector<std::string> row; + if (pRow.empty()) + { + row.resize(GetDataColumnCount()); + } + else + { + row.resize(GetDataColumnIndex(pRow.size())); + Converter<T> converter(mConverterParams); + for (auto itCol = pRow.begin(); itCol != pRow.end(); ++itCol) + { + std::string str; + converter.ToStr(*itCol, str); + row.at(static_cast<size_t>(std::distance(pRow.begin(), itCol) + mLabelParams.mRowNameIdx + 1)) = str; + } + } + + while (rowIdx > GetDataRowCount()) + { + std::vector<std::string> tempRow; + tempRow.resize(GetDataColumnCount()); + mData.push_back(tempRow); + } + + mData.insert(mData.begin() + static_cast<ssize_t>(rowIdx), row); + + if (!pRowName.empty()) + { + SetRowName(pRowIdx, pRowName); + } + + UpdateRowNames(); + } + + /** + * @brief Get number of data rows (excluding label rows). + * @returns row count. + */ + size_t GetRowCount() const + { + const ssize_t count = static_cast<ssize_t>(mData.size()) - (mLabelParams.mColumnNameIdx + 1); + return (count >= 0) ? static_cast<size_t>(count) : 0; + } + + /** + * @brief Get cell by index. + * @param pColumnIdx zero-based column index. + * @param pRowIdx zero-based row index. + * @returns cell data. + */ + template<typename T> + T GetCell(const size_t pColumnIdx, const size_t pRowIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + T val; + Converter<T> converter(mConverterParams); + converter.ToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + return val; + } + + /** + * @brief Get cell by index. + * @param pColumnIdx zero-based column index. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns cell data. + */ + template<typename T> + T GetCell(const size_t pColumnIdx, const size_t pRowIdx, ConvFunc<T> pToVal) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + T val; + pToVal(mData.at(dataRowIdx).at(dataColumnIdx), val); + return val; + } + + /** + * @brief Get cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @returns cell data. + */ + template<typename T> + T GetCell(const std::string& pColumnName, const std::string& pRowName) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx)); + } + + /** + * @brief Get cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns cell data. + */ + template<typename T> + T GetCell(const std::string& pColumnName, const std::string& pRowName, ConvFunc<T> pToVal) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pToVal); + } + + /** + * @brief Get cell by column name and row index. + * @param pColumnName column label name. + * @param pRowIdx zero-based row index. + * @returns cell data. + */ + template<typename T> + T GetCell(const std::string& pColumnName, const size_t pRowIdx) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx); + } + + /** + * @brief Get cell by column name and row index. + * @param pColumnName column label name. + * @param pRowIdx zero-based row index. + * @param pToVal conversion function. + * @returns cell data. + */ + template<typename T> + T GetCell(const std::string& pColumnName, const size_t pRowIdx, ConvFunc<T> pToVal) const + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + return GetCell<T>(static_cast<size_t>(columnIdx), pRowIdx, pToVal); + } + + /** + * @brief Get cell by column index and row name. + * @param pColumnIdx zero-based column index. + * @param pRowName row label name. + * @returns cell data. + */ + template<typename T> + T GetCell(const size_t pColumnIdx, const std::string& pRowName) const + { + const ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx)); + } + + /** + * @brief Get cell by column index and row name. + * @param pColumnIdx zero-based column index. + * @param pRowName row label name. + * @param pToVal conversion function. + * @returns cell data. + */ + template<typename T> + T GetCell(const size_t pColumnIdx, const std::string& pRowName, ConvFunc<T> pToVal) const + { + const ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + return GetCell<T>(pColumnIdx, static_cast<size_t>(rowIdx), pToVal); + } + + /** + * @brief Set cell by index. + * @param pRowIdx zero-based row index. + * @param pColumnIdx zero-based column index. + * @param pCell cell data. + */ + template<typename T> + void SetCell(const size_t pColumnIdx, const size_t pRowIdx, const T& pCell) + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + + while ((dataRowIdx + 1) > GetDataRowCount()) + { + std::vector<std::string> row; + row.resize(GetDataColumnCount()); + mData.push_back(row); + } + + if ((dataColumnIdx + 1) > GetDataColumnCount()) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + itRow->resize(dataColumnIdx + 1); + } + } + + std::string str; + Converter<T> converter(mConverterParams); + converter.ToStr(pCell, str); + mData.at(dataRowIdx).at(dataColumnIdx) = str; + } + + /** + * @brief Set cell by name. + * @param pColumnName column label name. + * @param pRowName row label name. + * @param pCell cell data. + */ + template<typename T> + void SetCell(const std::string& pColumnName, const std::string& pRowName, const T& pCell) + { + const ssize_t columnIdx = GetColumnIdx(pColumnName); + if (columnIdx < 0) + { + throw std::out_of_range("column not found: " + pColumnName); + } + + const ssize_t rowIdx = GetRowIdx(pRowName); + if (rowIdx < 0) + { + throw std::out_of_range("row not found: " + pRowName); + } + + SetCell<T>(static_cast<size_t>(columnIdx), static_cast<size_t>(rowIdx), pCell); + } + + /** + * @brief Get column name + * @param pColumnIdx zero-based column index. + * @returns column name. + */ + std::string GetColumnName(const size_t pColumnIdx) const + { + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + if (mLabelParams.mColumnNameIdx < 0) + { + throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); + } + + return mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx); + } + + /** + * @brief Set column name + * @param pColumnIdx zero-based column index. + * @param pColumnName column name. + */ + void SetColumnName(size_t pColumnIdx, const std::string& pColumnName) + { + if (mLabelParams.mColumnNameIdx < 0) + { + throw std::out_of_range("column name row index < 0: " + std::to_string(mLabelParams.mColumnNameIdx)); + } + + const size_t dataColumnIdx = GetDataColumnIndex(pColumnIdx); + mColumnNames[pColumnName] = dataColumnIdx; + + // increase table size if necessary: + const size_t rowIdx = static_cast<size_t>(mLabelParams.mColumnNameIdx); + if (rowIdx >= mData.size()) + { + mData.resize(rowIdx + 1); + } + auto& row = mData[rowIdx]; + if (dataColumnIdx >= row.size()) + { + row.resize(dataColumnIdx + 1); + } + + mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).at(dataColumnIdx) = pColumnName; + } + + /** + * @brief Get column names + * @returns vector of column names. + */ + std::vector<std::string> GetColumnNames() const + { + if (mLabelParams.mColumnNameIdx >= 0) + { + return std::vector<std::string>(mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).begin() + + (mLabelParams.mRowNameIdx + 1), + mData.at(static_cast<size_t>(mLabelParams.mColumnNameIdx)).end()); + } + + return std::vector<std::string>(); + } + + /** + * @brief Get row name + * @param pRowIdx zero-based column index. + * @returns row name. + */ + std::string GetRowName(const size_t pRowIdx) const + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + if (mLabelParams.mRowNameIdx < 0) + { + throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); + } + + return mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)); + } + + /** + * @brief Set row name + * @param pRowIdx zero-based row index. + * @param pRowName row name. + */ + void SetRowName(size_t pRowIdx, const std::string& pRowName) + { + const size_t dataRowIdx = GetDataRowIndex(pRowIdx); + mRowNames[pRowName] = dataRowIdx; + if (mLabelParams.mRowNameIdx < 0) + { + throw std::out_of_range("row name column index < 0: " + std::to_string(mLabelParams.mRowNameIdx)); + } + + // increase table size if necessary: + if (dataRowIdx >= mData.size()) + { + mData.resize(dataRowIdx + 1); + } + auto& row = mData[dataRowIdx]; + if (mLabelParams.mRowNameIdx >= static_cast<ssize_t>(row.size())) + { + row.resize(static_cast<size_t>(mLabelParams.mRowNameIdx) + 1); + } + + mData.at(dataRowIdx).at(static_cast<size_t>(mLabelParams.mRowNameIdx)) = pRowName; + } + + /** + * @brief Get row names + * @returns vector of row names. + */ + std::vector<std::string> GetRowNames() const + { + std::vector<std::string> rownames; + if (mLabelParams.mRowNameIdx >= 0) + { + for (auto itRow = mData.begin(); itRow != mData.end(); ++itRow) + { + if (std::distance(mData.begin(), itRow) > mLabelParams.mColumnNameIdx) + { + rownames.push_back(itRow->at(static_cast<size_t>(mLabelParams.mRowNameIdx))); + } + } + } + return rownames; + } + + private: + void ReadCsv() + { + std::ifstream stream; + stream.exceptions(std::ifstream::failbit | std::ifstream::badbit); + stream.open(mPath, std::ios::binary); + ReadCsv(stream); + } + + void ReadCsv(std::istream& pStream) + { + Clear(); + pStream.seekg(0, std::ios::end); + std::streamsize length = pStream.tellg(); + pStream.seekg(0, std::ios::beg); + +#ifdef HAS_CODECVT + std::vector<char> bom2b(2, '\0'); + if (length >= 2) + { + pStream.read(bom2b.data(), 2); + pStream.seekg(0, std::ios::beg); + } + + static const std::vector<char> bomU16le = { '\xff', '\xfe' }; + static const std::vector<char> bomU16be = { '\xfe', '\xff' }; + if ((bom2b == bomU16le) || (bom2b == bomU16be)) + { + mIsUtf16 = true; + mIsLE = (bom2b == bomU16le); + + std::wifstream wstream; + wstream.exceptions(std::wifstream::failbit | std::wifstream::badbit); + wstream.open(mPath, std::ios::binary); + if (mIsLE) + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16<wchar_t, 0x10ffff, + static_cast<std::codecvt_mode>(std::consume_header | + std::little_endian)>)); + } + else + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16<wchar_t, 0x10ffff, + std::consume_header>)); + } + std::wstringstream wss; + wss << wstream.rdbuf(); + std::string utf8 = ToString(wss.str()); + std::stringstream ss(utf8); + ParseCsv(ss, static_cast<std::streamsize>(utf8.size())); + } + else +#endif + { + // check for UTF-8 Byte order mark and skip it when found + if (length >= 3) + { + std::vector<char> bom3b(3, '\0'); + pStream.read(bom3b.data(), 3); + static const std::vector<char> bomU8 = { '\xef', '\xbb', '\xbf' }; + if (bom3b != bomU8) + { + // file does not start with a UTF-8 Byte order mark + pStream.seekg(0, std::ios::beg); + } + else + { + // file did start with a UTF-8 Byte order mark, simply skip it + length -= 3; + } + } + + ParseCsv(pStream, length); + } + } + + void ParseCsv(std::istream& pStream, std::streamsize p_FileLength) + { + const std::streamsize bufLength = 64 * 1024; + std::vector<char> buffer(bufLength); + std::vector<std::string> row; + std::string cell; + bool quoted = false; + int cr = 0; + int lf = 0; + + while (p_FileLength > 0) + { + const std::streamsize toReadLength = std::min<std::streamsize>(p_FileLength, bufLength); + pStream.read(buffer.data(), toReadLength); + + // With user-specified istream opened in non-binary mode on windows, we may have a + // data length mismatch, so ensure we don't parse outside actual data length read. + const std::streamsize readLength = pStream.gcount(); + if (readLength <= 0) + { + break; + } + + for (size_t i = 0; i < static_cast<size_t>(readLength); ++i) + { + if (buffer[i] == mSeparatorParams.mQuoteChar) + { + if (cell.empty() || (cell[0] == mSeparatorParams.mQuoteChar)) + { + quoted = !quoted; + } + cell += buffer[i]; + } + else if (buffer[i] == mSeparatorParams.mSeparator) + { + if (!quoted) + { + row.push_back(Unquote(Trim(cell))); + cell.clear(); + } + else + { + cell += buffer[i]; + } + } + else if (buffer[i] == '\r') + { + if (mSeparatorParams.mQuotedLinebreaks && quoted) + { + cell += buffer[i]; + } + else + { + ++cr; + } + } + else if (buffer[i] == '\n') + { + if (mSeparatorParams.mQuotedLinebreaks && quoted) + { + cell += buffer[i]; + } + else + { + ++lf; + if (mLineReaderParams.mSkipEmptyLines && row.empty() && cell.empty()) + { + // skip empty line + } + else + { + row.push_back(Unquote(Trim(cell))); + + if (mLineReaderParams.mSkipCommentLines && !row.at(0).empty() && + (row.at(0)[0] == mLineReaderParams.mCommentPrefix)) + { + // skip comment line + } + else + { + mData.push_back(row); + } + + cell.clear(); + row.clear(); + quoted = false; + } + } + } + else + { + cell += buffer[i]; + } + } + p_FileLength -= readLength; + } + + // Handle last line without linebreak + if (!cell.empty() || !row.empty()) + { + row.push_back(Unquote(Trim(cell))); + cell.clear(); + mData.push_back(row); + row.clear(); + } + + // Assume CR/LF if at least half the linebreaks have CR + mSeparatorParams.mHasCR = (cr > (lf / 2)); + + // Set up column labels + UpdateColumnNames(); + + // Set up row labels + UpdateRowNames(); + } + + void WriteCsv() const + { +#ifdef HAS_CODECVT + if (mIsUtf16) + { + std::stringstream ss; + WriteCsv(ss); + std::string utf8 = ss.str(); + std::wstring wstr = ToWString(utf8); + + std::wofstream wstream; + wstream.exceptions(std::wofstream::failbit | std::wofstream::badbit); + wstream.open(mPath, std::ios::binary | std::ios::trunc); + + if (mIsLE) + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16<wchar_t, 0x10ffff, + static_cast<std::codecvt_mode>(std::little_endian)>)); + } + else + { + wstream.imbue(std::locale(wstream.getloc(), + new std::codecvt_utf16<wchar_t, 0x10ffff>)); + } + + wstream << static_cast<wchar_t>(0xfeff); + wstream << wstr; + } + else +#endif + { + std::ofstream stream; + stream.exceptions(std::ofstream::failbit | std::ofstream::badbit); + stream.open(mPath, std::ios::binary | std::ios::trunc); + WriteCsv(stream); + } + } + + void WriteCsv(std::ostream& pStream) const + { + for (auto itr = mData.begin(); itr != mData.end(); ++itr) + { + for (auto itc = itr->begin(); itc != itr->end(); ++itc) + { + if (mSeparatorParams.mAutoQuote && + ((itc->find(mSeparatorParams.mSeparator) != std::string::npos) || + (itc->find(' ') != std::string::npos))) + { + // escape quotes in string + std::string str = *itc; + const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); + ReplaceString(str, quoteCharStr, quoteCharStr + quoteCharStr); + + pStream << quoteCharStr << str << quoteCharStr; + } + else + { + pStream << *itc; + } + + if (std::distance(itc, itr->end()) > 1) + { + pStream << mSeparatorParams.mSeparator; + } + } + pStream << (mSeparatorParams.mHasCR ? "\r\n" : "\n"); + } + } + + size_t GetDataRowCount() const + { + return mData.size(); + } + + size_t GetDataColumnCount() const + { + return (mData.size() > 0) ? mData.at(0).size() : 0; + } + + inline size_t GetDataRowIndex(const size_t pRowIdx) const + { + return pRowIdx + static_cast<size_t>(mLabelParams.mColumnNameIdx + 1); + } + + inline size_t GetDataColumnIndex(const size_t pColumnIdx) const + { + return pColumnIdx + static_cast<size_t>(mLabelParams.mRowNameIdx + 1); + } + + std::string Trim(const std::string& pStr) const + { + if (mSeparatorParams.mTrim) + { + std::string str = pStr; + + // ltrim + str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !isspace(ch); })); + + // rtrim + str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !isspace(ch); }).base(), str.end()); + + return str; + } + else + { + return pStr; + } + } + + std::string Unquote(const std::string& pStr) const + { + if (mSeparatorParams.mAutoQuote && (pStr.size() >= 2) && + (pStr.front() == mSeparatorParams.mQuoteChar) && + (pStr.back() == mSeparatorParams.mQuoteChar)) + { + // remove start/end quotes + std::string str = pStr.substr(1, pStr.size() - 2); + + // unescape quotes in string + const std::string quoteCharStr = std::string(1, mSeparatorParams.mQuoteChar); + ReplaceString(str, quoteCharStr + quoteCharStr, quoteCharStr); + + return str; + } + else + { + return pStr; + } + } + + void UpdateColumnNames() + { + mColumnNames.clear(); + if ((mLabelParams.mColumnNameIdx >= 0) && + (static_cast<ssize_t>(mData.size()) > mLabelParams.mColumnNameIdx)) + { + size_t i = 0; + for (auto& columnName : mData[static_cast<size_t>(mLabelParams.mColumnNameIdx)]) + { + mColumnNames[columnName] = i++; + } + } + } + + void UpdateRowNames() + { + mRowNames.clear(); + if ((mLabelParams.mRowNameIdx >= 0) && + (static_cast<ssize_t>(mData.size()) > + (mLabelParams.mColumnNameIdx + 1))) + { + size_t i = 0; + for (auto& dataRow : mData) + { + if (static_cast<ssize_t>(dataRow.size()) > mLabelParams.mRowNameIdx) + { + mRowNames[dataRow[static_cast<size_t>(mLabelParams.mRowNameIdx)]] = i++; + } + } + } + } + +#ifdef HAS_CODECVT +#if defined(_MSC_VER) +#pragma warning (push) +#pragma warning (disable: 4996) +#endif + static std::string ToString(const std::wstring& pWStr) + { + return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.to_bytes(pWStr); + } + + static std::wstring ToWString(const std::string& pStr) + { + return std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t>{ }.from_bytes(pStr); + } +#if defined(_MSC_VER) +#pragma warning (pop) +#endif +#endif + + static void ReplaceString(std::string& pStr, const std::string& pSearch, const std::string& pReplace) + { + size_t pos = 0; + + while ((pos = pStr.find(pSearch, pos)) != std::string::npos) + { + pStr.replace(pos, pSearch.size(), pReplace); + pos += pReplace.size(); + } + } + + private: + std::string mPath; + LabelParams mLabelParams; + SeparatorParams mSeparatorParams; + ConverterParams mConverterParams; + LineReaderParams mLineReaderParams; + std::vector<std::vector<std::string>> mData; + std::map<std::string, size_t> mColumnNames; + std::map<std::string, size_t> mRowNames; +#ifdef HAS_CODECVT + bool mIsUtf16 = false; + bool mIsLE = false; +#endif + }; +}