works for the most part

This commit is contained in:
anon 2023-11-13 20:24:59 +01:00
parent a4247754a5
commit 9fba401f78
13 changed files with 344 additions and 19 deletions

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
*.out
*.o
*.yy.*
.gdb_history

View File

@ -1,17 +1,35 @@
#LFLAGS := $(if $DEBUG, --debug --trace)
LDLIBS := -lboost_string_algo
.PHONY: clean test run
ifeq ($(DEBUG), 1)
LFLAGS += --debug --trace
CXXFLAGS += -Wall -Wextra -Wpedantic
CXXFLAGS += -DDEBUG -O0 -ggdb -pg -fno-inline
endif
OUT := a.out
SOURCE.d := source/
OBJECT.d := object/
TEST.d := test/
main:
${LEX} ${LFLAGS} -o ${SOURCE.d}/main.yy.cpp ${SOURCE.d}/main.l
${CXX} ${SOURCE.d}/main.yy.cpp -o ${OUT}
SOURCE := main.cpp xml.cpp cli.cpp
OBJECT := $(addprefix ${OBJECT.d}/,${SOURCE})
OBJECT := ${OBJECT:.cpp=.o}
OBJECT := ${OBJECT:.c=.o}
%.cpp: %.l
${LEX} ${LFLAGS} -o $@ $<
${OBJECT.d}/%.o: ${SOURCE.d}/%.cpp
${COMPILE.cpp} -o $@ $<
${OUT}: ${OBJECT}
${LINK.cpp} -o $@ ${OBJECT} ${LDLIBS}
run:
./${OUT} ${TEST.d}/draft.csml
# ./${OUT} ${TEST.d}/draft.csml
./${OUT} ${TEST.d}/draft.html
clean:
-rm ${OUT}
-rm ${OBJECT}

View File

@ -44,19 +44,28 @@ and seemlessly translate it in the end.
```
<tag> [(<head>)] {<body>}
```
```
<tag>;
```
Example:
```
div (?! find a stand alone attribute) { lorem ipsum }
textarea (readonly) { lorem ipsum }
```
```
br;
```
The last __identifier__,
defined by this regular expression: `[A-z][A-z0-9]*`,
before a (optional) _head_ or _body_
before a (optional) _head_, _body_ or semi-colon
is considered to be a __tag__.
CSML by itself does not enforce any (sub)set of words to be "valid"
(however, related tools might).
Each _tag_ is pushed into a stack and later popped by the end of a body being found.
If the _tag_ is followed by a semi-colon (';'),
it's a self-closing tag.
The __head__ holds _attributes_.
A missing _head_ signals that there are no attributes to be translated.
Any text may be a valid __attribute__.
@ -78,6 +87,7 @@ List of escaped special characters:
+ \}
+ \,
+ \:
+ \;
Note, that they are not requred to be always escaped,
but are highly advised.
@ -118,7 +128,7 @@ are different sides of the same coin.
```
?! [options] <file>+
-c : the input is to be force interpeted as CSML
-? : the input is to be force interpeted as XML/HTML
-x : the input is to be force interpeted as XML/HTML
-o <file> : specify output file name for the NEXT file
-q <char> : use <char> for quoting (default: "'")
-v : print version and quit

0
object/.gitkeep Normal file
View File

46
source/cli.cpp Normal file
View File

@ -0,0 +1,46 @@
#include "cli.h"
#include <getopt.h>
#include <stdio.h>
const char * const version =
# include "version.inc"
;
const char * help_message =
"?! [options] <file>+\n"
" -c : the input is to be force interpeted as CSML\n"
" -x : the input is to be force interpeted as XML/HTML\n"
" -o <file> : specify output file name for the NEXT file\n"
" -q <char> : use <char> for quoting (default: \"'\")\n"
" -v : print version and quit\n"
" -h : print help and quit\n"
;
signed parse_r1_arguments(int argc, char * * argv){
const char * const optstring = "hv" "cxo:q:";
static struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'v'},
{0, 0, 0, 0}
};
char opt;
while ((opt = getopt_long(argc, argv, optstring, long_options, NULL)) != -1) {
switch (opt) {
case 'h': {
fputs(help_message, stdout);
} return 1;
case 'v': {
fputs(version, stdout);
} return 1;
case '?': {
} return 2;
default: {
} break;
}
}
return 0;
}

9
source/cli.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef CLI_H
/* Parse arguments with perminant effects (-h)
* Perform validation.
*/
signed parse_r1_arguments(int argc, char * * argv);
#define CLI_H
#endif

View File

@ -1,12 +1,11 @@
%{
#include "scanner.h"
#include <stdio.h>
#include <stack>
#include <string>
#include <boost/algorithm/string/trim.hpp>
#define ECHOS(s) fwrite(s, strlen(s), sizeof(char), yyout)
#define ECHO_CANDIDATE _ECHO_CANDIDATE()
std::stack<std::string> tag_stack;
std::string tag_candidate = "";
@ -77,6 +76,10 @@ identifier [A-z][A-z0-9]*
ECHOS(("<" + tag_stack.top() + " ").c_str());
BEGIN HEAD;
}
; {
ECHOS(("<" + tag_candidate + "/>").c_str());
tag_candidate = "";
}
\{ {
push_tag();
ECHOS(("<" + tag_stack.top() + ">").c_str());
@ -156,8 +159,3 @@ identifier [A-z][A-z0-9]*
}
%%
signed main(int argc, char * * argv) {
yyin = fopen(argv[1], "r");
yylex();
}

108
source/main.cpp Normal file
View File

@ -0,0 +1,108 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <libgen.h>
#include "cli.h"
extern FILE * yyin;
extern FILE * yyout;
extern int yylex(void);
const char DEFAULT_QUOTE = '\'';
char quote = DEFAULT_QUOTE;
char * output = NULL;
char * input = NULL;
const char * const csml_extension = ".csml";
enum input_type_t {
CSML,
XML,
AUTO_DETECT,
} input_type = AUTO_DETECT;
const char * const auto_output_extensions[] = {csml_extension, ".html"};
char * output_name_from_input_name(const char * const input, const char * const extension) {
char * input_duplicate = strdup(input);
char * dn = strdup(dirname(input_duplicate));
char * bn = strdup(basename(input_duplicate));
free(input_duplicate);
char * r = (char *)malloc(strlen(dn) + strlen(bn) + strlen(extension));
strcpy(r, dn);
strcat(r, bn);
const char *last_dot = strrchr(bn, '.');
if (last_dot) {
strncpy(r, bn, last_dot - bn);
} else {
strcpy(r, bn);
}
strcat(r, extension);
free(dn);
free(bn);
return r;
}
signed main(int argc, char * * argv) {
switch (parse_r1_arguments(argc - 1, argv + 1)) {
case 1: {
} return 0;
case 2: {
} return 1;
default: {
} break;
}
for (int n = argc - 1; n; n--) {
if (!strcmp(argv[n], "-c")) {
input_type = input_type_t::CSML;
} else if (!strcmp(argv[n], "-x")) {
input_type = input_type_t::XML;
} else if (!strcmp(argv[n], "-q")) {
++n;
quote = argv[n][0];
} else if (!strcmp(argv[n], "-o")) {
++n;
output = argv[n];
} else {
/* --- Preparation --- */
input = argv[n];
if (input_type == AUTO_DETECT) {
if (!strcmp(input + strlen(input) - (sizeof(csml_extension)-1), csml_extension)) {
input_type = CSML;
} else {
input_type = XML;
}
}
if (output) {
yyout = fopen(output, "w");
} else {
yyout = stdout;
}
yyin = fopen(input, "r");
yyout = fopen(output, "w");
/* --- Meat --- */
yylex();
/* --- Clean up --- */
if (yyin != stdin) {
fclose(yyin);
}
if (yyout != stdout) {
fclose(yyout);
}
free(output);
output = NULL;
}
}
}

6
source/scanner.h Normal file
View File

@ -0,0 +1,6 @@
#ifndef SCANNER_H
#define ECHOS(s) fwrite(s, strlen(s), sizeof(char), yyout)
#define SCANNER_H
#endif

1
source/version.inc Normal file
View File

@ -0,0 +1 @@
"v0.4"

115
source/xml.l Normal file
View File

@ -0,0 +1,115 @@
%{
#include "scanner.h"
#include <stdio.h>
bool is_comment_multiline = false;
unsigned long long comment_begining;
%}
%option noyywrap
%option nodefault
%x TAG_START TAG_MAYBE TAG COMMENT STRING
ws [ \t\r\v\f]
wsnl [ \t\r\v\f\n]
identifier [A-z][A-z0-9]*
%%
<INITIAL>{
\< {
BEGIN TAG_START;
}
\<\!-- {
comment_begining = ftell(yyin);
ECHOS("//");
BEGIN COMMENT;
}
.|\n {
ECHO;
}
}
<COMMENT>{
. {
ECHO;
}
\n {
is_comment_multiline = true;
}
--\> {
if (is_comment_multiline) {
auto buffer = ftell(yyin);
fseek(yyin, comment_begining+1, SEEK_SET);
fputc('*', yyin);
fseek(yyin, buffer, SEEK_SET);
}
BEGIN INITIAL;
}
}
<TAG_START>{
\/{identifier}+{wsnl}*\> {
ECHOS("}");
BEGIN INITIAL;
}
{identifier}+ {
ECHO;
BEGIN TAG_MAYBE;
}
}
<TAG_MAYBE>{
\> {
ECHOS(" {");
BEGIN INITIAL;
}
\/\> {
ECHOS(";");
BEGIN INITIAL;
}
{wsnl} {
ECHO;
}
. {
yyless(0);
ECHOS("(");
BEGIN TAG;
}
}
<TAG>{
\"|\' {
BEGIN STRING;
}
= {
ECHOS(": ");
}
\> {
ECHOS(") {");
BEGIN INITIAL;
}
\/\> {
ECHOS(";");
BEGIN INITIAL;
}
.|\n {
ECHO;
}
}
<STRING>{
[^\\]\"|\' {
BEGIN TAG;
}
, {
ECHOS("\\,");
}
.|\n {
ECHO;
}
}
%%

View File

@ -1,9 +1,10 @@
// DOCTYPE HTML
// DOCTYPE HTML
html {
head {
}
body {
div(class: myclass) {
hr;
div (class: myclass) {
lorem ipsum
}
}

12
test/draft.html Normal file
View File

@ -0,0 +1,12 @@
./a.out test//draft.csml
<!-- DOCTYPE HTML -->
<html>
<head>
</head>
<body>
<hr/>
<div class='myclass'>
lorem ipsum
</div>
</body>
</html>