219 lines
2.6 KiB
Plaintext
219 lines
2.6 KiB
Plaintext
%{
|
|
#include "scanner.hpp"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "global.hpp"
|
|
#include "html_special.hpp"
|
|
|
|
bool is_comment_multiline;
|
|
std::string current_tag;
|
|
long ignore_start;
|
|
|
|
unsigned long long cursor_position = 0;
|
|
#define YY_USER_ACTION cursor_position += yyleng;
|
|
%}
|
|
|
|
%option noyywrap
|
|
%option nodefault
|
|
%option noyylineno
|
|
|
|
%x TAG_START TAG_MAYBE TAG
|
|
%x COMMENT
|
|
%x STRING
|
|
%x IGNORE IGNORE_SEEK IGNORE_COUNT_START IGNORE_COUNT_END
|
|
|
|
ws [ \t\r\v\f]
|
|
wsnl [ \t\r\v\f\n]
|
|
identifier [A-z][A-z0-9]*
|
|
|
|
%%
|
|
<INITIAL>{
|
|
\< {
|
|
BEGIN TAG_START;
|
|
}
|
|
\<\!-- {
|
|
is_comment_multiline = false;
|
|
buffer = std::string("");
|
|
BUFFER("//");
|
|
BEGIN COMMENT;
|
|
}
|
|
&[A-z]+; {
|
|
ECHOS(html_special_to_utf8(html_special_table_lookup(yytext)));
|
|
}
|
|
&[0-9]+; {
|
|
ECHOS(html_special_to_utf8(yytext));
|
|
}
|
|
.|\n {
|
|
ECHO;
|
|
}
|
|
}
|
|
|
|
<COMMENT>{
|
|
. {
|
|
BUFFER(yytext);
|
|
}
|
|
\n {
|
|
BUFFER(yytext);
|
|
is_comment_multiline = true;
|
|
}
|
|
--\> {
|
|
if (is_comment_multiline) {
|
|
buffer[1] = '*';
|
|
buffer += "*/";
|
|
}
|
|
ECHOS(buffer.c_str());
|
|
BEGIN INITIAL;
|
|
}
|
|
}
|
|
|
|
<TAG_START>{
|
|
\/{identifier}+{wsnl}*\> {
|
|
ECHOC('}');
|
|
BEGIN INITIAL;
|
|
}
|
|
{identifier}+ {
|
|
ECHO;
|
|
current_tag = yytext;
|
|
BEGIN TAG_MAYBE;
|
|
}
|
|
}
|
|
|
|
<TAG_MAYBE>{
|
|
\> {
|
|
ECHOS(" {");
|
|
if (do_ignore(current_tag)) {
|
|
ignore_start = cursor_position;
|
|
BEGIN IGNORE_SEEK;
|
|
} else {
|
|
BEGIN INITIAL;
|
|
}
|
|
}
|
|
\/\> {
|
|
ECHOC(';');
|
|
BEGIN INITIAL;
|
|
}
|
|
{wsnl} {
|
|
ECHO;
|
|
}
|
|
. {
|
|
yyless(0);
|
|
ECHOC('(');
|
|
BEGIN TAG;
|
|
}
|
|
}
|
|
|
|
<TAG>{
|
|
\"|\' {
|
|
BEGIN STRING;
|
|
}
|
|
= {
|
|
ECHOS(": ");
|
|
}
|
|
\> {
|
|
ECHOS(") {");
|
|
if (do_ignore(current_tag)) {
|
|
ignore_start = cursor_position;
|
|
BEGIN IGNORE_SEEK;
|
|
} else {
|
|
BEGIN INITIAL;
|
|
}
|
|
}
|
|
\/\> {
|
|
ECHOC(';');
|
|
BEGIN INITIAL;
|
|
}
|
|
.|\n {
|
|
ECHO;
|
|
}
|
|
}
|
|
|
|
<STRING>{
|
|
[^\\]\"|\' {
|
|
BEGIN TAG;
|
|
}
|
|
, {
|
|
ECHOS("\\,");
|
|
}
|
|
.|\n {
|
|
ECHO;
|
|
}
|
|
}
|
|
|
|
<IGNORE_SEEK>{
|
|
\<\/{identifier}+\> {
|
|
char * dup;
|
|
dup = strdup(yytext);
|
|
trim(dup);
|
|
if (!strcmp(dup, current_tag.c_str())) {
|
|
for (int i = 0; i < ignore_count; i++) {
|
|
ECHOC('{');
|
|
}
|
|
fseek(yyin, ignore_start, SEEK_SET);
|
|
YY_FLUSH_BUFFER;
|
|
BEGIN IGNORE;
|
|
} else {
|
|
ECHO;
|
|
}
|
|
free(dup);
|
|
}
|
|
\{ {
|
|
BEGIN IGNORE_COUNT_START;
|
|
}
|
|
\} {
|
|
BEGIN IGNORE_COUNT_END;
|
|
}
|
|
.|\n {
|
|
}
|
|
}
|
|
|
|
<IGNORE_COUNT_START>{
|
|
\{ {
|
|
++ignore_i;
|
|
}
|
|
}
|
|
|
|
<IGNORE_COUNT_END>{
|
|
\} {
|
|
++ignore_i;
|
|
}
|
|
}
|
|
|
|
<IGNORE_COUNT_START,IGNORE_COUNT_END>{
|
|
.|\n {
|
|
if (ignore_i > ignore_count) {
|
|
ignore_count = ignore_i;
|
|
}
|
|
ignore_i = 0;
|
|
BEGIN IGNORE_SEEK;
|
|
}
|
|
}
|
|
|
|
<IGNORE>{
|
|
\<\/{identifier}+\> {
|
|
char * dup;
|
|
dup = strdup(yytext);
|
|
trim(dup);
|
|
if (!strcmp(dup, current_tag.c_str())) {
|
|
for (int i = -1; i < ignore_count; i++) {
|
|
ECHOC('}');
|
|
}
|
|
ignore_count = 1;
|
|
BEGIN INITIAL;
|
|
} else {
|
|
ECHO;
|
|
}
|
|
free(dup);
|
|
}
|
|
/*
|
|
[{|}] {
|
|
ECHOC('\\');
|
|
ECHOC(yytext[0]);
|
|
}
|
|
*/
|
|
.|\n {
|
|
ECHO;
|
|
}
|
|
}
|
|
%%
|