%{ #include "scanner.hpp" #include #include "global.hpp" #include "html_special.hpp" #include "exit_values.hpp" unsigned xml_tag_stack = 0; static bool is_comment_multiline; static std::string current_tag; static char current_string_quote; int state_buffer; unsigned long long cursor_position = 0; #define YY_USER_ACTION cursor_position += yyleng; inline static void xml_tag_stack_push() { ++xml_tag_stack; } inline static void xml_tag_stack_pop() { if (!xml_tag_stack) { exit(POLUTED_STACK); } --xml_tag_stack; } %} %option noyywrap %option nodefault %option noyylineno %x TAG_START TAG_MAYBE TAG %x TAG_ASYMETRIC_SPECIAL %x COMMENT %x DECLARATION %x STRING %x IGNORE IGNORE_COUNT_START IGNORE_COUNT_END ws [ \t\r\v\f] wsnl [ \t\r\v\f\n] identifier [A-z][A-z0-9]* %% { \< { BEGIN TAG_START; } \<\!-- { is_comment_multiline = false; buffer = std::string(""); BUFFER("//"); BEGIN COMMENT; } \<\! { ECHOS("#!"); BEGIN DECLARATION; } &[A-z]+; { ECHOS(html_special_to_utf8(html_special_table_lookup(yytext))); } &[0-9]+; { ECHOS(html_special_to_utf8(yytext)); } [(){};] { ECHOC('\\'); ECHOC(yytext[0]); } .|\n { ECHO; } } { . { BUFFER(yytext); } \n { BUFFER(yytext); is_comment_multiline = true; } --\> { if (is_comment_multiline) { buffer[1] = '*'; buffer += "*/"; } ECHOS(buffer.c_str()); BEGIN INITIAL; } } { \> { ECHOC(';'); BEGIN INITIAL; } \'|\" { ECHO; current_string_quote = yytext[0]; state_buffer = DECLARATION; BEGIN STRING; } .|\n { ECHO; } } { \/{identifier}+{wsnl}*\> { xml_tag_stack_pop(); ECHOC('}'); BEGIN INITIAL; } {identifier}+ { ECHO; current_tag = yytext; BEGIN TAG_MAYBE; } . { is_asymmetric = std::find(asymmetric_special_list.begin(), asymmetric_special_list.end(), yytext) != asymmetric_special_list.end(); if (is_asymmetric) { ECHOC('<'); ECHO; BEGIN TAG_ASYMETRIC_SPECIAL; } else { exit(UNRECOGNIZED_TAG); } } } { \> { xml_tag_stack_push(); ECHOS(" {"); BEGIN IGNORE; } \/\> { ECHOC(';'); BEGIN INITIAL; } {wsnl} { ECHO; } . { yyless(0); ECHOC('('); BEGIN TAG; } } { \"|\' { current_string_quote = yytext[0]; state_buffer = TAG; BEGIN STRING; } = { ECHOS(": "); } \> { xml_tag_stack_push(); ECHOS(") {"); BEGIN IGNORE; } \/\> { ECHOS(");"); BEGIN INITIAL; } {ws} { ECHOS(", "); } .|\n { ECHO; } } { .\> { ECHO; is_asymmetric = std::find(asymmetric_special_list.begin(), asymmetric_special_list.end(), (std::string("") + yytext[0])) != asymmetric_special_list.end(); if (is_asymmetric) { BEGIN INITIAL; } } .|\n { ECHO; } } { [^\\](\"|\') { if (current_string_quote == yytext[1]) { { // XXX: this is such a hack switch (yytext[0]) { case ',': case '(': case ')': ECHOC('\\'); } ECHOC(yytext[0]); } if (state_buffer == DECLARATION) { ECHOC(yytext[1]); } BEGIN state_buffer; } else { ECHO; } } [,)(] { ECHOC('\\'); ECHOC(yytext[0]); } .|\n { ECHO; } } { \<\/{identifier}+\> { char * dup; dup = strdup(yytext); trim(dup); const int eq = !strcmp(dup, current_tag.c_str()); free(dup); if (eq) { for (int i = 0; i < ignore_count; i++) { ECHOC('{'); } ECHOS(buffer.c_str()); for (int i = -1; i < ignore_count; i++) { ECHOC('}'); } ignore_count = 1; xml_tag_stack_pop(); BEGIN INITIAL; } else { BUFFER(yytext); ECHO; } } \{ { BUFFER(yytext); BEGIN IGNORE_COUNT_START; } \} { BUFFER(yytext); BEGIN IGNORE_COUNT_END; } .|\n { BUFFER(yytext); } } { \{ { BUFFER(yytext); ++ignore_i; } } { \} { BUFFER(yytext); ++ignore_i; } } { .|\n { BUFFER(yytext); if (ignore_i > ignore_count) { ignore_count = ignore_i; } ignore_i = 0; BEGIN IGNORE; } } %%