diff --git a/Makefile b/Makefile index 7cee082..16f15f8 100644 --- a/Makefile +++ b/Makefile @@ -16,13 +16,13 @@ OBJECT.d := object/ TEST.d := test/ INSTALL.d := /bin/ -SOURCE := main.cpp xml.cpp csml.cpp cli.cpp global.cpp html_special.cpp +SOURCE := main.cpp xml.cpp csml.cpp cli.cpp html_special.cpp OBJECT := $(addprefix ${OBJECT.d}/,${SOURCE}) OBJECT := ${OBJECT:.cpp=.o} OBJECT := ${OBJECT:.c=.o} %.cpp: %.l - ${LEX} --prefix=$(basename $(notdir $<)) ${LFLAGS} -o $@ $< + ${LEX} --prefix=$(basename $(notdir $<))_ ${LFLAGS} -o $@ $< ${OBJECT.d}/%.o: ${SOURCE.d}/%.cpp ${COMPILE.cpp} -o $@ $< @@ -36,10 +36,10 @@ install: ${OUT} uninstall: ${RM} ${INSTALL.d}/${OUT} -vim_install: install +vim_install: cp plugin/contra.vim ~/.vim/plugin/ -code_install: install code +code_install: code code --install-extension plugin/vscode/*.vsix code: @@ -48,9 +48,11 @@ code: test: bat ${TEST.d}/draft.csml - ${WRAP} ./${OUT} -i '$$html' -c ${TEST.d}/draft.csml - bat ${TEST.d}/draft.html - ${WRAP} ./${OUT} -i '$$html' -x ${TEST.d}/draft.html + ${WRAP} ./${OUT} -s 'html' -c ${TEST.d}/draft.csml + bat --paging=never ${TEST.d}/draft.html + ${WRAP} ./${OUT} -s 'html' -x ${TEST.d}/draft.html + bat --paging=never ${TEST.d}/complex.html + ${WRAP} ./${OUT} -s 'html' -x ${TEST.d}/complex.html clean: -rm ${OUT} diff --git a/source/cli.cpp b/source/cli.cpp index 1187b72..eff3c07 100644 --- a/source/cli.cpp +++ b/source/cli.cpp @@ -1,18 +1,38 @@ #include "cli.hpp" -#include "exit_values.hpp" - #include #include +#include +#include +#include "exit_values.hpp" +#include "scanner.hpp" + +#define DECLARE_LEXER(x) \ + extern FILE * x ## _in; \ + extern FILE * x ## _out; \ + extern int x ## _lex(void); \ + +DECLARE_LEXER(csml); +DECLARE_LEXER(xml); + +extern std::stack csml_tag_stack; +extern unsigned xml_tag_stack; + +const char * const csml_extension = ".csml"; + +static const char * const version = # include "version.inc" ; +static const char * const help_message = "contra [options] +\n" " -c : the input is to be force interpeted as CSML\n" " -x : the input is to be force interpeted as XML/HTML\n" +" -s : colon separeted list of option sets\n" +" -S : colon separeted list of special asymetric tags starters\n" " -i : colon separeted list of tags which contents should be ignored\n" " -o : specify output file name for the NEXT file\n" " -q : use for quoting (default: \"'\")\n" @@ -20,9 +40,51 @@ const char * const help_message = " -h : print help and quit\n" ; +const char * output = NULL; +const char * input = NULL; + +enum class input_type_t { + CSML, + XML, + AUTO_DETECT, +} input_type = input_type_t::AUTO_DETECT; + +// ### Local functions ### +static +inline +void try_fopen(FILE * &file, const char * const path, const char * const mode) { + file = fopen(path, mode); + if (!file) { + fprintf(stderr, "Error opening file '%s'.\n", path); + fflush(stderr); + exit(IO_ERROR); + } +} + +static +void yylex(FILE * &yyin, FILE * &yyout, int (*yylex_)(void)) { + if (output) { + try_fopen(yyout, output, "w"); + } else { + yyout = stdout; + } + try_fopen(yyin, input, "r"); + + + yylex_(); + + if (yyin != stdin) { + fclose(yyin); + } + if (yyout != stdout) { + fclose(yyout); + } +} + +// ### global functions ### extern "C" signed parse_round1_arguments(int argc, char * * argv){ - const char * const optstring = "-" "hv" "cxi:o:q:"; + const char * const optstring = "-" "hv" "cxs:S:i:o:q:"; static struct option long_options[] = { {"help", no_argument, 0, 'h'}, @@ -48,3 +110,95 @@ signed parse_round1_arguments(int argc, char * * argv){ return 0; } + +extern "C" +signed parse_round2_arguments(int argc, char * * argv) { + for (int n = 0; n < argc; n++) { + if (!strcmp(argv[n], "-c")) { + input_type = input_type_t::CSML; + } else if (!strcmp(argv[n], "-x")) { + input_type = input_type_t::XML; + } else if (!strcmp(argv[n], "-q")) { + ++n; + quote = argv[n][0]; + } else if (!strcmp(argv[n], "-i")) { + ++n; + parse_colon_list(argv[n], ignore_list); + } else if (!strcmp(argv[n], "-s")) { + ++n; + const int err = parse_sets(argv[n]); + if (err) { + exit(err); + } + } else if (!strcmp(argv[n], "-S")) { + ++n; + parse_colon_list(argv[n], asymmetric_special_list); + } else if (!strcmp(argv[n], "-o")) { + ++n; + output = argv[n]; + } else { + input = argv[n]; + + if (input_type == input_type_t::AUTO_DETECT) { + if (!strcmp(input + strlen(input) - (sizeof(csml_extension)-1), csml_extension)) { + input_type = input_type_t::CSML; + } else { + input_type = input_type_t::XML; + } + } + + switch (input_type) { + case input_type_t::CSML: { + yylex(csml_in, csml_out, csml_lex); + if (not csml_tag_stack.empty()) { + exit(POPULATED_STACK); + } + } break; + case input_type_t::XML: { + yylex(xml_in, xml_out, xml_lex); + if(xml_tag_stack) { + exit(POPULATED_STACK); + } + } break; + default: { + }; + } + + output = NULL; + } + } + + return 0; +} + +extern "C" +signed parse_colon_list(char * const list, std::vector destination) { + const char * delimiter = ":"; + char * data = strtok(list, delimiter); + int i = 0; + do { + destination.emplace_back(data); + ++i; + } while((data = strtok(NULL, delimiter), data)); + + return 0; +} + +extern "C" +signed parse_sets(char * const list) { + const char * delimiter = ":"; + char * data = strtok(list, delimiter); + int i = 0; + do { + if (!strcmp(data, "html")) { + ignore_list.emplace_back("style"); + ignore_list.emplace_back("script"); + asymmetric_special_list.emplace_back("?"); + } else { + return UNKNOWN_SET; + } + ++i; + } while((data = strtok(NULL, delimiter), data)); + + return 0; +} diff --git a/source/cli.hpp b/source/cli.hpp index d92d49a..776a50a 100644 --- a/source/cli.hpp +++ b/source/cli.hpp @@ -1,10 +1,19 @@ #ifndef CLI_H +#include +#include + +extern "C" signed parse_colon_list(char * const list, std::vector destination); +extern "C" signed parse_sets(char * const list); + /* Parse arguments with perminant effects (-h) * Perform validation. */ -extern "C" -signed parse_round1_arguments(int argc, char * * argv); +extern "C" signed parse_round1_arguments(int argc, char * * argv); + +/* Parse context sensitive arguments + */ +extern "C" signed parse_round2_arguments(int argc, char * * argv); #define CLI_H #endif diff --git a/source/csml.l b/source/csml.l index 628bdf8..668f3b2 100644 --- a/source/csml.l +++ b/source/csml.l @@ -9,7 +9,7 @@ #include "html_special.hpp" #include "global.hpp" -std::stack tag_stack; +std::stack csml_tag_stack; static std::string tag_candidate = ""; @@ -37,17 +37,17 @@ void push_tag() { } trim(tag_candidate); - tag_stack.push(tag_candidate); + csml_tag_stack.push(tag_candidate); tag_candidate = ""; } static void pop_tag() { - if (tag_stack.empty()) { + if (csml_tag_stack.empty()) { exit(TAG_NOT_FOUND); } - tag_stack.pop(); + csml_tag_stack.pop(); tag_candidate = ""; } @@ -87,7 +87,7 @@ unicode [\300-\364] } \( { push_tag(); - ECHOS(("<" + tag_stack.top() + " ").c_str()); + ECHOS(("<" + csml_tag_stack.top() + " ").c_str()); BEGIN HEAD; } &#?{identifier}; { @@ -101,15 +101,15 @@ unicode [\300-\364] } \{ { push_tag(); - ECHOS(("<" + tag_stack.top() + ">").c_str()); - if (do_ignore(tag_stack.top())) { + ECHOS(("<" + csml_tag_stack.top() + ">").c_str()); + if (do_ignore(csml_tag_stack.top())) { buffer = std::string(""); BEGIN IGNORE_COUNT_START; } } \} { ECHO_CANDIDATE; - ECHOS(("").c_str()); + ECHOS(("").c_str()); pop_tag(); } \< { @@ -211,7 +211,7 @@ unicode [\300-\364] ignore_count = 1; ECHOS(buffer.c_str()); - ECHOS(("").c_str()); + ECHOS(("").c_str()); pop_tag(); BEGIN BODY; } @@ -235,7 +235,7 @@ unicode [\300-\364] BEGIN IGNORE_COUNT_END; } else { ECHOS(buffer.c_str()); - ECHOS(("").c_str()); + ECHOS(("").c_str()); pop_tag(); BEGIN BODY; } diff --git a/source/exit_values.hpp b/source/exit_values.hpp index bc9eb55..4ccadc9 100644 --- a/source/exit_values.hpp +++ b/source/exit_values.hpp @@ -1,9 +1,10 @@ const static int EXIT_EARLY_SUCCESS = 400; enum { - UNKNOWN_OPTION = 1, - IO_ERROR = 2, - UNKNOWN_SET = 3, - POPULATED_STACK = 4, // most likely signals that more tags were opened than closed, ie the user forgot adding a '}' somewhere - TAG_NOT_NAMED = 5, - TAG_NOT_FOUND = 6, + UNKNOWN_OPTION = 1, + IO_ERROR = 2, + UNKNOWN_SET = 3, + POPULATED_STACK = 4, // most likely signals that more tags were opened than closed, ie the user forgot adding a '}' somewhere + TAG_NOT_NAMED = 5, + TAG_NOT_FOUND = 6, + UNRECOGNIZED_TAG = 7, }; diff --git a/source/global.cpp b/source/global.cpp deleted file mode 100644 index e787df9..0000000 --- a/source/global.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "global.hpp" - -#include -#include -#include - -std::vector ignore_list; -int ignore_count = 1; -int ignore_i = 1; -std::string buffer; - -void trim(char * const s) { - int bp = 0; - int len = strlen(s); - bool do_break = false; - int i = 0; - for (;i < len; i++) { - if ((s[i] >= 'A' && s[i] <= 'Z') - || (s[i] >= 'a' && s[i] <= 'z') - || (s[i] >= '0' && s[i] <= '9') - || (s[i] == '_')) { - s[bp++] = s[i]; - do_break = true; - } else if (do_break) { - break; - } - } - s[bp] = '\0'; -} - -void trim(std::string &s) { - char * dup = strdup(s.c_str()); - trim(dup); - s = std::string(dup); - free(dup); -} diff --git a/source/main.cpp b/source/main.cpp index c000647..f31c3c0 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -5,38 +5,18 @@ #include #include #include -#include -#include #include "cli.hpp" #include "scanner.hpp" #include "exit_values.hpp" -extern std::stack tag_stack; +std::vector ignore_list; +std::vector asymmetric_special_list; -#define DECLARE_LEXER(x) \ - extern FILE * x ## in; \ - extern FILE * x ## out; \ - extern int x ## lex(void); \ +int ignore_count = 1; +int ignore_i = 1; -DECLARE_LEXER(csml); -DECLARE_LEXER(xml); - -char * output = NULL; -char * input = NULL; -const char * const csml_extension = ".csml"; - -const std::map> sets = { - {"$html", {"style", "script"}}, -}; - -enum class input_type_t { - CSML, - XML, - AUTO_DETECT, -} input_type = input_type_t::AUTO_DETECT; - -const char * const auto_output_extensions[] = {csml_extension, ".html"}; +std::string buffer; char * output_name_from_input_name(const char * const input, const char * const extension) { char * input_duplicate = strdup(input); @@ -63,107 +43,40 @@ char * output_name_from_input_name(const char * const input, const char * const return r; } -static -inline -void try_fopen(FILE * &file, const char * const path, const char * const mode) { - file = fopen(path, mode); - if (!file) { - fprintf(stderr, "Error opening file '%s'.\n", path); - fflush(stderr); - exit(IO_ERROR); - } +void trim(char * const s) { + int bp = 0; + int len = strlen(s); + bool do_break = false; + for (int i = 0; i < len; i++) { + if ((s[i] >= 'A' && s[i] <= 'Z') + || (s[i] >= 'a' && s[i] <= 'z') + || (s[i] >= '0' && s[i] <= '9') + || (s[i] == '_')) { + s[bp++] = s[i]; + do_break = true; + } else if (do_break) { + break; + } + } + s[bp] = '\0'; } -static -void yylex(FILE * &yyin, FILE * &yyout, int (*yylex_)(void)) { - /* --- Preparation --- */ - if (output) { - try_fopen(yyout, output, "w"); - } else { - yyout = stdout; - } - try_fopen(yyin, input, "r"); - - - /* --- Meat --- */ - yylex_(); - - /* --- Clean up --- */ - if (yyin != stdin) { - fclose(yyin); - } - if (yyout != stdout) { - fclose(yyout); - } +void trim(std::string &s) { + char * dup = strdup(s.c_str()); + trim(dup); + s = std::string(dup); + free(dup); } signed main(int argc, char * * argv) { - { - const int b = parse_round1_arguments(argc - 1, argv + 1); - switch (b) { - case 0: break; - case EXIT_EARLY_SUCCESS: exit(EXIT_SUCCESS); - default: exit(b); - } + const int b = parse_round1_arguments(argc - 1, argv + 1); + switch (b) { + case 0: break; + case EXIT_EARLY_SUCCESS: exit(EXIT_SUCCESS); + default: exit(b); } + + parse_round2_arguments(argc - 1, argv + 1); - for (int n = 1; n < argc; n++) { - if (!strcmp(argv[n], "-c")) { - input_type = input_type_t::CSML; - } else if (!strcmp(argv[n], "-x")) { - input_type = input_type_t::XML; - } else if (!strcmp(argv[n], "-q")) { - ++n; - quote = argv[n][0]; - } else if (!strcmp(argv[n], "-i")) { - ++n; - const char * delimiter = ":"; - char * data = strtok(argv[n], delimiter); - int i = 0; - do { - if (data[0] == '$') { - const auto &&set = sets.find("$html"); - [[ likely ]] if (set != sets.end()) { - ignore_list.insert(ignore_list.begin(), set->second.begin(), set->second.end()); - } else { - exit(UNKNOWN_SET); - } - } else [[ likely ]] { - ignore_list.emplace_back(data); - } - ++i; - } while((data = strtok(NULL, delimiter), data)); - } else if (!strcmp(argv[n], "-o")) { - ++n; - output = argv[n]; - } else { - input = argv[n]; - - if (input_type == input_type_t::AUTO_DETECT) { - if (!strcmp(input + strlen(input) - (sizeof(csml_extension)-1), csml_extension)) { - input_type = input_type_t::CSML; - } else { - input_type = input_type_t::XML; - } - } - - switch (input_type) { - case input_type_t::CSML: { - yylex(csmlin, csmlout, csmllex); - if (not tag_stack.empty()) { - exit(POPULATED_STACK); - } - } break; - case input_type_t::XML: { - yylex(xmlin, xmlout, xmllex); - } break; - default: { - }; - } - - output = NULL; - } - } - return EXIT_SUCCESS; } diff --git a/source/scanner.hpp b/source/scanner.hpp index 17c22b0..1a62fe8 100644 --- a/source/scanner.hpp +++ b/source/scanner.hpp @@ -12,6 +12,7 @@ #define BUFFER(s) buffer += s extern std::vector ignore_list; +extern std::vector asymmetric_special_list; inline bool do_ignore(const std::string ¤t_tag) { @@ -21,12 +22,21 @@ bool do_ignore(const std::string ¤t_tag) { != ignore_list.end(); } -extern int ignore_count; // number of '{' / '}'s to be placed around the current ignored block -extern int ignore_i; // number of '}'s so far - -extern std::string buffer; - extern char quote; +/* number of '{' / '}'s to be placed around the current ignored block + */ +extern int ignore_count; + +/* number of '}'s so far + */ +extern int ignore_i; + +/* used for saving sections whichs starting projection + * cannot be determined before reading the while + * (e.g. comments (single- or multiline?)) + */ +extern std::string buffer; + #define SCANNER_H #endif diff --git a/source/xml.l b/source/xml.l index 149f8a2..504492f 100644 --- a/source/xml.l +++ b/source/xml.l @@ -5,10 +5,12 @@ #include "global.hpp" #include "html_special.hpp" +#include "exit_values.hpp" bool is_comment_multiline; std::string current_tag; -long ignore_start; +unsigned xml_tag_stack = 0; +bool is_asymmetric; unsigned long long cursor_position = 0; #define YY_USER_ACTION cursor_position += yyleng; @@ -19,13 +21,14 @@ unsigned long long cursor_position = 0; %option noyylineno %x TAG_START TAG_MAYBE TAG +%x TAG_ASYMETRIC_SPECIAL %x COMMENT %x STRING -%x IGNORE IGNORE_SEEK IGNORE_COUNT_START IGNORE_COUNT_END +%x IGNORE IGNORE_COUNT_START IGNORE_COUNT_END ws [ \t\r\v\f] wsnl [ \t\r\v\f\n] -identifier [A-z][A-z0-9]* +identifier [A-z!][A-z0-9]* %% { @@ -69,6 +72,7 @@ identifier [A-z][A-z0-9]* { \/{identifier}+{wsnl}*\> { + --xml_tag_stack; ECHOC('}'); BEGIN INITIAL; } @@ -77,14 +81,28 @@ identifier [A-z][A-z0-9]* current_tag = yytext; BEGIN TAG_MAYBE; } +. { + is_asymmetric = std::find(asymmetric_special_list.begin(), + asymmetric_special_list.end(), + yytext) + != asymmetric_special_list.end(); + if (is_asymmetric) { + ECHOC('<'); + ECHO; + BEGIN TAG_ASYMETRIC_SPECIAL; + } else { + exit(UNRECOGNIZED_TAG); + } +} } { \> { + ++xml_tag_stack; ECHOS(" {"); if (do_ignore(current_tag)) { - ignore_start = cursor_position; - BEGIN IGNORE_SEEK; + buffer = std::string(""); + BEGIN IGNORE; } else { BEGIN INITIAL; } @@ -111,18 +129,38 @@ identifier [A-z][A-z0-9]* ECHOS(": "); } \> { + ++xml_tag_stack; ECHOS(") {"); if (do_ignore(current_tag)) { - ignore_start = cursor_position; - BEGIN IGNORE_SEEK; + buffer = std::string(""); + BEGIN IGNORE; } else { BEGIN INITIAL; } } \/\> { - ECHOC(';'); + ECHOS(");"); BEGIN INITIAL; } +{ws} { + ECHOS(", "); +} +.|\n { + ECHO; +} +} + +{ +.\> { + ECHO; + is_asymmetric = std::find(asymmetric_special_list.begin(), + asymmetric_special_list.end(), + (std::string("") + yytext[0])) + != asymmetric_special_list.end(); + if (is_asymmetric) { + BEGIN INITIAL; + } +} .|\n { ECHO; } @@ -130,6 +168,7 @@ identifier [A-z][A-z0-9]* { [^\\]\"|\' { + ECHOC(yytext[0]); BEGIN TAG; } , { @@ -140,79 +179,64 @@ identifier [A-z][A-z0-9]* } } -{ +{ \<\/{identifier}+\> { + --xml_tag_stack; char * dup; dup = strdup(yytext); trim(dup); - if (!strcmp(dup, current_tag.c_str())) { + const int eq = !strcmp(dup, current_tag.c_str()); + free(dup); + if (eq) { for (int i = 0; i < ignore_count; i++) { ECHOC('{'); } - fseek(yyin, ignore_start, SEEK_SET); - YY_FLUSH_BUFFER; - BEGIN IGNORE; - } else { - ECHO; - } - free(dup); -} -\{ { - BEGIN IGNORE_COUNT_START; -} -\} { - BEGIN IGNORE_COUNT_END; -} -.|\n { -} -} - -{ -\{ { - ++ignore_i; -} -} - -{ -\} { - ++ignore_i; -} -} - -{ -.|\n { - if (ignore_i > ignore_count) { - ignore_count = ignore_i; - } - ignore_i = 0; - BEGIN IGNORE_SEEK; -} -} - -{ -\<\/{identifier}+\> { - char * dup; - dup = strdup(yytext); - trim(dup); - if (!strcmp(dup, current_tag.c_str())) { + ECHOS(buffer.c_str()); for (int i = -1; i < ignore_count; i++) { ECHOC('}'); } ignore_count = 1; BEGIN INITIAL; } else { + BUFFER(yytext); ECHO; } - free(dup); } - /* -[{|}] { - ECHOC('\\'); - ECHOC(yytext[0]); +\{ { + BUFFER(yytext); + BEGIN IGNORE_COUNT_START; +} +\} { + BUFFER(yytext); + BEGIN IGNORE_COUNT_END; } - */ .|\n { - ECHO; + BUFFER(yytext); +} +} + +{ +\{ { + BUFFER(yytext); + ++ignore_i; +} +} + +{ +\} { + BUFFER(yytext); + ++ignore_i; +} +} + +{ +.|\n { + BUFFER(yytext); + if (ignore_i > ignore_count) { + ignore_count = ignore_i; + } + ignore_i = 0; + BEGIN IGNORE; } } %%