This commit is contained in:
anon 2023-11-19 22:13:04 +01:00
parent 342be3b4d1
commit b2db7c0e14
9 changed files with 329 additions and 252 deletions

View File

@ -16,13 +16,13 @@ OBJECT.d := object/
TEST.d := test/
INSTALL.d := /bin/
SOURCE := main.cpp xml.cpp csml.cpp cli.cpp global.cpp html_special.cpp
SOURCE := main.cpp xml.cpp csml.cpp cli.cpp html_special.cpp
OBJECT := $(addprefix ${OBJECT.d}/,${SOURCE})
OBJECT := ${OBJECT:.cpp=.o}
OBJECT := ${OBJECT:.c=.o}
%.cpp: %.l
${LEX} --prefix=$(basename $(notdir $<)) ${LFLAGS} -o $@ $<
${LEX} --prefix=$(basename $(notdir $<))_ ${LFLAGS} -o $@ $<
${OBJECT.d}/%.o: ${SOURCE.d}/%.cpp
${COMPILE.cpp} -o $@ $<
@ -36,10 +36,10 @@ install: ${OUT}
uninstall:
${RM} ${INSTALL.d}/${OUT}
vim_install: install
vim_install:
cp plugin/contra.vim ~/.vim/plugin/
code_install: install code
code_install: code
code --install-extension plugin/vscode/*.vsix
code:
@ -48,9 +48,11 @@ code:
test:
bat ${TEST.d}/draft.csml
${WRAP} ./${OUT} -i '$$html' -c ${TEST.d}/draft.csml
bat ${TEST.d}/draft.html
${WRAP} ./${OUT} -i '$$html' -x ${TEST.d}/draft.html
${WRAP} ./${OUT} -s 'html' -c ${TEST.d}/draft.csml
bat --paging=never ${TEST.d}/draft.html
${WRAP} ./${OUT} -s 'html' -x ${TEST.d}/draft.html
bat --paging=never ${TEST.d}/complex.html
${WRAP} ./${OUT} -s 'html' -x ${TEST.d}/complex.html
clean:
-rm ${OUT}

View File

@ -1,18 +1,38 @@
#include "cli.hpp"
#include "exit_values.hpp"
#include <getopt.h>
#include <stdio.h>
#include <string.h>
#include <stack>
#include "exit_values.hpp"
#include "scanner.hpp"
#define DECLARE_LEXER(x) \
extern FILE * x ## _in; \
extern FILE * x ## _out; \
extern int x ## _lex(void); \
DECLARE_LEXER(csml);
DECLARE_LEXER(xml);
extern std::stack<std::string> csml_tag_stack;
extern unsigned xml_tag_stack;
const char * const csml_extension = ".csml";
static
const char * const version =
# include "version.inc"
;
static
const char * const help_message =
"contra [options] <file>+\n"
" -c : the input is to be force interpeted as CSML\n"
" -x : the input is to be force interpeted as XML/HTML\n"
" -s <string> : colon separeted list of option sets\n"
" -S <string> : colon separeted list of special asymetric tags starters\n"
" -i <string> : colon separeted list of tags which contents should be ignored\n"
" -o <file> : specify output file name for the NEXT file\n"
" -q <char> : use <char> for quoting (default: \"'\")\n"
@ -20,9 +40,51 @@ const char * const help_message =
" -h : print help and quit\n"
;
const char * output = NULL;
const char * input = NULL;
enum class input_type_t {
CSML,
XML,
AUTO_DETECT,
} input_type = input_type_t::AUTO_DETECT;
// ### Local functions ###
static
inline
void try_fopen(FILE * &file, const char * const path, const char * const mode) {
file = fopen(path, mode);
if (!file) {
fprintf(stderr, "Error opening file '%s'.\n", path);
fflush(stderr);
exit(IO_ERROR);
}
}
static
void yylex(FILE * &yyin, FILE * &yyout, int (*yylex_)(void)) {
if (output) {
try_fopen(yyout, output, "w");
} else {
yyout = stdout;
}
try_fopen(yyin, input, "r");
yylex_();
if (yyin != stdin) {
fclose(yyin);
}
if (yyout != stdout) {
fclose(yyout);
}
}
// ### global functions ###
extern "C"
signed parse_round1_arguments(int argc, char * * argv){
const char * const optstring = "-" "hv" "cxi:o:q:";
const char * const optstring = "-" "hv" "cxs:S:i:o:q:";
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
@ -48,3 +110,95 @@ signed parse_round1_arguments(int argc, char * * argv){
return 0;
}
extern "C"
signed parse_round2_arguments(int argc, char * * argv) {
for (int n = 0; n < argc; n++) {
if (!strcmp(argv[n], "-c")) {
input_type = input_type_t::CSML;
} else if (!strcmp(argv[n], "-x")) {
input_type = input_type_t::XML;
} else if (!strcmp(argv[n], "-q")) {
++n;
quote = argv[n][0];
} else if (!strcmp(argv[n], "-i")) {
++n;
parse_colon_list(argv[n], ignore_list);
} else if (!strcmp(argv[n], "-s")) {
++n;
const int err = parse_sets(argv[n]);
if (err) {
exit(err);
}
} else if (!strcmp(argv[n], "-S")) {
++n;
parse_colon_list(argv[n], asymmetric_special_list);
} else if (!strcmp(argv[n], "-o")) {
++n;
output = argv[n];
} else {
input = argv[n];
if (input_type == input_type_t::AUTO_DETECT) {
if (!strcmp(input + strlen(input) - (sizeof(csml_extension)-1), csml_extension)) {
input_type = input_type_t::CSML;
} else {
input_type = input_type_t::XML;
}
}
switch (input_type) {
case input_type_t::CSML: {
yylex(csml_in, csml_out, csml_lex);
if (not csml_tag_stack.empty()) {
exit(POPULATED_STACK);
}
} break;
case input_type_t::XML: {
yylex(xml_in, xml_out, xml_lex);
if(xml_tag_stack) {
exit(POPULATED_STACK);
}
} break;
default: {
};
}
output = NULL;
}
}
return 0;
}
extern "C"
signed parse_colon_list(char * const list, std::vector<std::string> destination) {
const char * delimiter = ":";
char * data = strtok(list, delimiter);
int i = 0;
do {
destination.emplace_back(data);
++i;
} while((data = strtok(NULL, delimiter), data));
return 0;
}
extern "C"
signed parse_sets(char * const list) {
const char * delimiter = ":";
char * data = strtok(list, delimiter);
int i = 0;
do {
if (!strcmp(data, "html")) {
ignore_list.emplace_back("style");
ignore_list.emplace_back("script");
asymmetric_special_list.emplace_back("?");
} else {
return UNKNOWN_SET;
}
++i;
} while((data = strtok(NULL, delimiter), data));
return 0;
}

View File

@ -1,10 +1,19 @@
#ifndef CLI_H
#include <vector>
#include <string>
extern "C" signed parse_colon_list(char * const list, std::vector<std::string> destination);
extern "C" signed parse_sets(char * const list);
/* Parse arguments with perminant effects (-h)
* Perform validation.
*/
extern "C"
signed parse_round1_arguments(int argc, char * * argv);
extern "C" signed parse_round1_arguments(int argc, char * * argv);
/* Parse context sensitive arguments
*/
extern "C" signed parse_round2_arguments(int argc, char * * argv);
#define CLI_H
#endif

View File

@ -9,7 +9,7 @@
#include "html_special.hpp"
#include "global.hpp"
std::stack<std::string> tag_stack;
std::stack<std::string> csml_tag_stack;
static std::string tag_candidate = "";
@ -37,17 +37,17 @@ void push_tag() {
}
trim(tag_candidate);
tag_stack.push(tag_candidate);
csml_tag_stack.push(tag_candidate);
tag_candidate = "";
}
static
void pop_tag() {
if (tag_stack.empty()) {
if (csml_tag_stack.empty()) {
exit(TAG_NOT_FOUND);
}
tag_stack.pop();
csml_tag_stack.pop();
tag_candidate = "";
}
@ -87,7 +87,7 @@ unicode [\300-\364]
}
\( {
push_tag();
ECHOS(("<" + tag_stack.top() + " ").c_str());
ECHOS(("<" + csml_tag_stack.top() + " ").c_str());
BEGIN HEAD;
}
&#?{identifier}; {
@ -101,15 +101,15 @@ unicode [\300-\364]
}
\{ {
push_tag();
ECHOS(("<" + tag_stack.top() + ">").c_str());
if (do_ignore(tag_stack.top())) {
ECHOS(("<" + csml_tag_stack.top() + ">").c_str());
if (do_ignore(csml_tag_stack.top())) {
buffer = std::string("");
BEGIN IGNORE_COUNT_START;
}
}
\} {
ECHO_CANDIDATE;
ECHOS(("</" + tag_stack.top() + ">").c_str());
ECHOS(("</" + csml_tag_stack.top() + ">").c_str());
pop_tag();
}
\< {
@ -211,7 +211,7 @@ unicode [\300-\364]
ignore_count = 1;
ECHOS(buffer.c_str());
ECHOS(("</" + tag_stack.top() + ">").c_str());
ECHOS(("</" + csml_tag_stack.top() + ">").c_str());
pop_tag();
BEGIN BODY;
}
@ -235,7 +235,7 @@ unicode [\300-\364]
BEGIN IGNORE_COUNT_END;
} else {
ECHOS(buffer.c_str());
ECHOS(("</" + tag_stack.top() + ">").c_str());
ECHOS(("</" + csml_tag_stack.top() + ">").c_str());
pop_tag();
BEGIN BODY;
}

View File

@ -1,9 +1,10 @@
const static int EXIT_EARLY_SUCCESS = 400;
enum {
UNKNOWN_OPTION = 1,
IO_ERROR = 2,
UNKNOWN_SET = 3,
POPULATED_STACK = 4, // most likely signals that more tags were opened than closed, ie the user forgot adding a '}' somewhere
TAG_NOT_NAMED = 5,
TAG_NOT_FOUND = 6,
UNKNOWN_OPTION = 1,
IO_ERROR = 2,
UNKNOWN_SET = 3,
POPULATED_STACK = 4, // most likely signals that more tags were opened than closed, ie the user forgot adding a '}' somewhere
TAG_NOT_NAMED = 5,
TAG_NOT_FOUND = 6,
UNRECOGNIZED_TAG = 7,
};

View File

@ -1,36 +0,0 @@
#include "global.hpp"
#include <string.h>
#include <string>
#include <vector>
std::vector<std::string> ignore_list;
int ignore_count = 1;
int ignore_i = 1;
std::string buffer;
void trim(char * const s) {
int bp = 0;
int len = strlen(s);
bool do_break = false;
int i = 0;
for (;i < len; i++) {
if ((s[i] >= 'A' && s[i] <= 'Z')
|| (s[i] >= 'a' && s[i] <= 'z')
|| (s[i] >= '0' && s[i] <= '9')
|| (s[i] == '_')) {
s[bp++] = s[i];
do_break = true;
} else if (do_break) {
break;
}
}
s[bp] = '\0';
}
void trim(std::string &s) {
char * dup = strdup(s.c_str());
trim(dup);
s = std::string(dup);
free(dup);
}

View File

@ -5,38 +5,18 @@
#include <libgen.h>
#include <vector>
#include <string>
#include <stack>
#include <map>
#include "cli.hpp"
#include "scanner.hpp"
#include "exit_values.hpp"
extern std::stack<std::string> tag_stack;
std::vector<std::string> ignore_list;
std::vector<std::string> asymmetric_special_list;
#define DECLARE_LEXER(x) \
extern FILE * x ## in; \
extern FILE * x ## out; \
extern int x ## lex(void); \
int ignore_count = 1;
int ignore_i = 1;
DECLARE_LEXER(csml);
DECLARE_LEXER(xml);
char * output = NULL;
char * input = NULL;
const char * const csml_extension = ".csml";
const std::map<const char * const, std::vector<std::string>> sets = {
{"$html", {"style", "script"}},
};
enum class input_type_t {
CSML,
XML,
AUTO_DETECT,
} input_type = input_type_t::AUTO_DETECT;
const char * const auto_output_extensions[] = {csml_extension, ".html"};
std::string buffer;
char * output_name_from_input_name(const char * const input, const char * const extension) {
char * input_duplicate = strdup(input);
@ -63,107 +43,40 @@ char * output_name_from_input_name(const char * const input, const char * const
return r;
}
static
inline
void try_fopen(FILE * &file, const char * const path, const char * const mode) {
file = fopen(path, mode);
if (!file) {
fprintf(stderr, "Error opening file '%s'.\n", path);
fflush(stderr);
exit(IO_ERROR);
}
void trim(char * const s) {
int bp = 0;
int len = strlen(s);
bool do_break = false;
for (int i = 0; i < len; i++) {
if ((s[i] >= 'A' && s[i] <= 'Z')
|| (s[i] >= 'a' && s[i] <= 'z')
|| (s[i] >= '0' && s[i] <= '9')
|| (s[i] == '_')) {
s[bp++] = s[i];
do_break = true;
} else if (do_break) {
break;
}
}
s[bp] = '\0';
}
static
void yylex(FILE * &yyin, FILE * &yyout, int (*yylex_)(void)) {
/* --- Preparation --- */
if (output) {
try_fopen(yyout, output, "w");
} else {
yyout = stdout;
}
try_fopen(yyin, input, "r");
/* --- Meat --- */
yylex_();
/* --- Clean up --- */
if (yyin != stdin) {
fclose(yyin);
}
if (yyout != stdout) {
fclose(yyout);
}
void trim(std::string &s) {
char * dup = strdup(s.c_str());
trim(dup);
s = std::string(dup);
free(dup);
}
signed main(int argc, char * * argv) {
{
const int b = parse_round1_arguments(argc - 1, argv + 1);
switch (b) {
case 0: break;
case EXIT_EARLY_SUCCESS: exit(EXIT_SUCCESS);
default: exit(b);
}
const int b = parse_round1_arguments(argc - 1, argv + 1);
switch (b) {
case 0: break;
case EXIT_EARLY_SUCCESS: exit(EXIT_SUCCESS);
default: exit(b);
}
parse_round2_arguments(argc - 1, argv + 1);
for (int n = 1; n < argc; n++) {
if (!strcmp(argv[n], "-c")) {
input_type = input_type_t::CSML;
} else if (!strcmp(argv[n], "-x")) {
input_type = input_type_t::XML;
} else if (!strcmp(argv[n], "-q")) {
++n;
quote = argv[n][0];
} else if (!strcmp(argv[n], "-i")) {
++n;
const char * delimiter = ":";
char * data = strtok(argv[n], delimiter);
int i = 0;
do {
if (data[0] == '$') {
const auto &&set = sets.find("$html");
[[ likely ]] if (set != sets.end()) {
ignore_list.insert(ignore_list.begin(), set->second.begin(), set->second.end());
} else {
exit(UNKNOWN_SET);
}
} else [[ likely ]] {
ignore_list.emplace_back(data);
}
++i;
} while((data = strtok(NULL, delimiter), data));
} else if (!strcmp(argv[n], "-o")) {
++n;
output = argv[n];
} else {
input = argv[n];
if (input_type == input_type_t::AUTO_DETECT) {
if (!strcmp(input + strlen(input) - (sizeof(csml_extension)-1), csml_extension)) {
input_type = input_type_t::CSML;
} else {
input_type = input_type_t::XML;
}
}
switch (input_type) {
case input_type_t::CSML: {
yylex(csmlin, csmlout, csmllex);
if (not tag_stack.empty()) {
exit(POPULATED_STACK);
}
} break;
case input_type_t::XML: {
yylex(xmlin, xmlout, xmllex);
} break;
default: {
};
}
output = NULL;
}
}
return EXIT_SUCCESS;
}

View File

@ -12,6 +12,7 @@
#define BUFFER(s) buffer += s
extern std::vector<std::string> ignore_list;
extern std::vector<std::string> asymmetric_special_list;
inline
bool do_ignore(const std::string &current_tag) {
@ -21,12 +22,21 @@ bool do_ignore(const std::string &current_tag) {
!= ignore_list.end();
}
extern int ignore_count; // number of '{' / '}'s to be placed around the current ignored block
extern int ignore_i; // number of '}'s so far
extern std::string buffer;
extern char quote;
/* number of '{' / '}'s to be placed around the current ignored block
*/
extern int ignore_count;
/* number of '}'s so far
*/
extern int ignore_i;
/* used for saving sections whichs starting projection
* cannot be determined before reading the while
* (e.g. comments (single- or multiline?))
*/
extern std::string buffer;
#define SCANNER_H
#endif

View File

@ -5,10 +5,12 @@
#include "global.hpp"
#include "html_special.hpp"
#include "exit_values.hpp"
bool is_comment_multiline;
std::string current_tag;
long ignore_start;
unsigned xml_tag_stack = 0;
bool is_asymmetric;
unsigned long long cursor_position = 0;
#define YY_USER_ACTION cursor_position += yyleng;
@ -19,13 +21,14 @@ unsigned long long cursor_position = 0;
%option noyylineno
%x TAG_START TAG_MAYBE TAG
%x TAG_ASYMETRIC_SPECIAL
%x COMMENT
%x STRING
%x IGNORE IGNORE_SEEK IGNORE_COUNT_START IGNORE_COUNT_END
%x IGNORE IGNORE_COUNT_START IGNORE_COUNT_END
ws [ \t\r\v\f]
wsnl [ \t\r\v\f\n]
identifier [A-z][A-z0-9]*
identifier [A-z!][A-z0-9]*
%%
<INITIAL>{
@ -69,6 +72,7 @@ identifier [A-z][A-z0-9]*
<TAG_START>{
\/{identifier}+{wsnl}*\> {
--xml_tag_stack;
ECHOC('}');
BEGIN INITIAL;
}
@ -77,14 +81,28 @@ identifier [A-z][A-z0-9]*
current_tag = yytext;
BEGIN TAG_MAYBE;
}
. {
is_asymmetric = std::find(asymmetric_special_list.begin(),
asymmetric_special_list.end(),
yytext)
!= asymmetric_special_list.end();
if (is_asymmetric) {
ECHOC('<');
ECHO;
BEGIN TAG_ASYMETRIC_SPECIAL;
} else {
exit(UNRECOGNIZED_TAG);
}
}
}
<TAG_MAYBE>{
\> {
++xml_tag_stack;
ECHOS(" {");
if (do_ignore(current_tag)) {
ignore_start = cursor_position;
BEGIN IGNORE_SEEK;
buffer = std::string("");
BEGIN IGNORE;
} else {
BEGIN INITIAL;
}
@ -111,18 +129,38 @@ identifier [A-z][A-z0-9]*
ECHOS(": ");
}
\> {
++xml_tag_stack;
ECHOS(") {");
if (do_ignore(current_tag)) {
ignore_start = cursor_position;
BEGIN IGNORE_SEEK;
buffer = std::string("");
BEGIN IGNORE;
} else {
BEGIN INITIAL;
}
}
\/\> {
ECHOC(';');
ECHOS(");");
BEGIN INITIAL;
}
{ws} {
ECHOS(", ");
}
.|\n {
ECHO;
}
}
<TAG_ASYMETRIC_SPECIAL>{
.\> {
ECHO;
is_asymmetric = std::find(asymmetric_special_list.begin(),
asymmetric_special_list.end(),
(std::string("") + yytext[0]))
!= asymmetric_special_list.end();
if (is_asymmetric) {
BEGIN INITIAL;
}
}
.|\n {
ECHO;
}
@ -130,6 +168,7 @@ identifier [A-z][A-z0-9]*
<STRING>{
[^\\]\"|\' {
ECHOC(yytext[0]);
BEGIN TAG;
}
, {
@ -140,79 +179,64 @@ identifier [A-z][A-z0-9]*
}
}
<IGNORE_SEEK>{
<IGNORE>{
\<\/{identifier}+\> {
--xml_tag_stack;
char * dup;
dup = strdup(yytext);
trim(dup);
if (!strcmp(dup, current_tag.c_str())) {
const int eq = !strcmp(dup, current_tag.c_str());
free(dup);
if (eq) {
for (int i = 0; i < ignore_count; i++) {
ECHOC('{');
}
fseek(yyin, ignore_start, SEEK_SET);
YY_FLUSH_BUFFER;
BEGIN IGNORE;
} else {
ECHO;
}
free(dup);
}
\{ {
BEGIN IGNORE_COUNT_START;
}
\} {
BEGIN IGNORE_COUNT_END;
}
.|\n {
}
}
<IGNORE_COUNT_START>{
\{ {
++ignore_i;
}
}
<IGNORE_COUNT_END>{
\} {
++ignore_i;
}
}
<IGNORE_COUNT_START,IGNORE_COUNT_END>{
.|\n {
if (ignore_i > ignore_count) {
ignore_count = ignore_i;
}
ignore_i = 0;
BEGIN IGNORE_SEEK;
}
}
<IGNORE>{
\<\/{identifier}+\> {
char * dup;
dup = strdup(yytext);
trim(dup);
if (!strcmp(dup, current_tag.c_str())) {
ECHOS(buffer.c_str());
for (int i = -1; i < ignore_count; i++) {
ECHOC('}');
}
ignore_count = 1;
BEGIN INITIAL;
} else {
BUFFER(yytext);
ECHO;
}
free(dup);
}
/*
[{|}] {
ECHOC('\\');
ECHOC(yytext[0]);
\{ {
BUFFER(yytext);
BEGIN IGNORE_COUNT_START;
}
\} {
BUFFER(yytext);
BEGIN IGNORE_COUNT_END;
}
*/
.|\n {
ECHO;
BUFFER(yytext);
}
}
<IGNORE_COUNT_START>{
\{ {
BUFFER(yytext);
++ignore_i;
}
}
<IGNORE_COUNT_END>{
\} {
BUFFER(yytext);
++ignore_i;
}
}
<IGNORE_COUNT_START,IGNORE_COUNT_END>{
.|\n {
BUFFER(yytext);
if (ignore_i > ignore_count) {
ignore_count = ignore_i;
}
ignore_i = 0;
BEGIN IGNORE;
}
}
%%