--- /dev/null
+## Bugs
+ + segfaults under -O2
+ + i cannot decypher the valgrind warnings
+ + a sinle character right before keywords is always highlighted; the bug is understood, the resolution design is under contemplation
+ + newlines are not yet given special treatment in regex_match()
include chad.mk
DEBUG:=1
-CFLAGS:=-std=c99 -O2 -Wvla -Wshadow -Wundef $(if ${DEBUG}, ${CHAD_DEBUG},'')
+CFLAGS:=-std=c99 -O2 $(if ${DEBUG}, ${CHAD_DEBUG},'')
CPPFLAGS:=-D_FORTIFY_SOURCE=2
SRC.dir:=source/
# Programs to check warnings for as defined by the Chad standard
GCC:=gcc
-GCC.warnings:=-Wall -Wextra -Wpedantic
+GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef
CLANG:=clang
CLANG.warnings:=-Weverything
VALGRIND:=valgrind
+VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all
chad_test:
${GCC} ${GCC.warnings} ${SRC} -o ${OUT}
${CLANG} ${GCC.warnings} ${SRC} -o ${OUT}
- ${VALGRIND} ${OUT} ${OUTARGS}
+ ${VALGRIND} ${VALGRIND.flags} ${OUT} ${OUTARGS}
.DEFAULT_GOAL:=main
#include <ctype.h>
#include <string.h>
#include "chad.h"
+#include "regex.h"
typedef void (*attribute_callback_t)(const char * const string,
const int length,
token_t * token_table[1000];
int token_table_top = 0;
-token_t * new_token(const char * const syntax,
- const token_type_t t,
- const hl_group_t * const g) {
+int append_token(token_t * token){
+ token_table[token_table_top++] = token;
+ return 0;
+}
+
+token_t * new_symbol_token(const char * const word,
+ hl_group_t * const g) {
+
+ char * new_word = strdup(word);
+
token_t * mt = (token_t*)malloc(sizeof(token_t));
mt->hl = g;
- mt->t = t;
- mt->syntax = syntax;
- token_table[token_table_top++] = mt;
+ mt->t = KEYSYMBOL;
+ mt->syntax = new_word;
+ append_token(mt);
return mt;
+
}
-void new_keyword_tokens(const char * const * words,
- hl_group_t * const g) {
+int new_symbol_tokens(const char * const * symbols,
+ hl_group_t * const g) {
+
int i = 0;
- while (*words) {
- if(new_token(*words, KEYWORD, g)){
+ while (*symbols) {
+ if(new_symbol_token(*symbols, g)){
++i;
}
- ++words;
+ ++symbols;
}
return i;
}
-int token_fits(const char* const pattern,
- const char* const to) {
- if (pattern == NULL) {
- return true;
- }
- for (int i = 0;; i++) {
- if (pattern[i] == '\00') {
- return i;
+int new_char_tokens(const char * characters,
+ hl_group_t * const g) {
+ int i = 0;
+ char buffer[2];
+ buffer[1] = '\00';
+ for(const char * s = characters; *s != '\00'; s++){
+ buffer[0] = *s;
+ if(new_symbol_token(buffer, g)){
+ ++i;
}
- if (to[i] == '\00'
- || pattern[i] != to[i]) {
- return false;
+ }
+ return i;
+}
+
+token_t * new_keyword_token(const char * const word,
+ hl_group_t * const g) {
+
+ size_t word_length = strlen(word);
+ char * new_word = (char*)malloc(word_length + 4 + 1);
+ memcpy(new_word, "\\<", 2);
+ memcpy(new_word + 2, word, word_length);
+ strcpy(new_word + 2 + word_length, "\\>");
+
+ token_t * mt = (token_t*)malloc(sizeof(token_t));
+ mt->hl = g;
+ mt->t = KEYWORD;
+ mt->syntax = new_word;
+ append_token(mt);
+ return mt;
+}
+
+token_t * new_token(const char * const word,
+ const token_type_t t,
+ hl_group_t * const g) {
+ switch(t){
+ case KEYSYMBOL: {
+ return new_symbol_token(word, g);
+ };
+ case KEYWORD: {
+ return new_keyword_token(word, g);
+ };
+ case MATCH: {
+ } break;
+ case REGION: {
+ } break;
+ }
+ // XXX: implement the rest
+}
+
+int new_keyword_tokens(const char * const * words,
+ hl_group_t * const g) {
+ int i = 0;
+ while (*words) {
+ if(new_keyword_token(*words, g)){
+ ++i;
}
+ ++words;
}
+
+ return i;
}
-bool is_word_separator(const char character) {
- if (( isascii(character))
- && (!isalnum(character))
- && ( character != '_')) {
- return 1;
- } else {
- return 0;
+int token_fits(const token_t* const token,
+ const char* const to) {
+
+ const char * const pattern = token->syntax;
+
+ if (pattern == NULL) {
+ return true;
}
+
+ return regex_match(pattern, to);
}
void render_string(const char * const string,
int f;
int i = 0;
for (; i < token_table_top; i++) {
- f = token_fits(token_table[i]->syntax, s);
+ f = token_fits(token_table[i], s);
if(f){ break; }
}
//
+//register
+//putchar()
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
static size_t buffer_size = 0;
typedef struct {
- int attribute;
- int foreground_color;
- int background_color;
+ char * attribute;
+ char * foreground_color;
+ char * background_color;
} terminal_hl_t;
void cterm_render_callback(const char * const string,
const int length,
void * const attributes) {
if(!length){
+ fputs(TERMINAL_STYLE_BOLD, stdout);
putchar(*string);
+ fputs(TERMINAL_RESET, stdout);
return;
}
- UNUSED(attributes);
- fputs(TERMINAL_STYLE_BOLD, stdout);
+ terminal_hl_t * term_hl = (terminal_hl_t*)attributes;
+ fputs(term_hl->attribute, stdout);
+ fputs(term_hl->foreground_color, stdout);
for (int i = 0; i < length; i++) {
putchar(*(string+i));
}
NULL
};
- terminal_hl_t my_hl = (terminal_hl_t) {
- .attribute = 1
- };
-
+ //
display_t * cterm = &(display_t) {
.key = "cterm",
.callback = cterm_render_callback
};
- hl_group_t mygroup = (hl_group_t) {
- .link = NULL
+ //
+ terminal_hl_t terminal_keyword_hl = (terminal_hl_t) {
+ .attribute = TERMINAL_STYLE_BOLD,
+ .foreground_color = TERMINAL_COLOR_FG_GREEN,
+ .background_color = NULL
+ };
+ hl_group_t keyword_hl = (hl_group_t) {
+ .link = NULL,
+ .attributes = (void*)&terminal_keyword_hl
+ };
+ //
+ terminal_hl_t terminal_preprocessor_hl = (terminal_hl_t) {
+ .attribute = TERMINAL_STYLE_BOLD,
+ .foreground_color = TERMINAL_COLOR_FG_BLUE,
+ .background_color = NULL
};
+ hl_group_t preprocessor_hl = (hl_group_t) {
+ .link = NULL,
+ .attributes = (void*)&terminal_preprocessor_hl
+ };
+ //
+ terminal_hl_t terminal_symbol_hl = (terminal_hl_t) {
+ .attribute = TERMINAL_STYLE_BOLD,
+ .foreground_color = TERMINAL_COLOR_FG_YELLOW,
+ .background_color = NULL
+ };
+ hl_group_t symbol_hl = (hl_group_t) {
+ .link = NULL,
+ .attributes = (void*)&terminal_symbol_hl
+ };
+ //
new_display_mode(cterm);
- new_keyword_tokens(c_keywords, &mygroup);
- new_keyword_tokens(preprocessor_keywords, &mygroup);
-
+ new_keyword_tokens(c_keywords, &keyword_hl);
+ new_keyword_tokens(preprocessor_keywords, &preprocessor_hl);
+ new_char_tokens("&|()[]{}*,", &symbol_hl);
//
render_string(buffer, "cterm");
putchar('\n');
- free (buffer);
+ free(buffer);
return 0;
}
--- /dev/null
+#include "regex.h"
+
+bool is_case_on = true;
+
+static bool is_next_valid(const char * const s) {
+ return *(s + 1);
+}
+
+static bool char_in_range(const char start,
+ const char end,
+ const char character) {
+ if (start > end){
+ return false;
+ }
+
+ for (char c = start; c != end; c++) {
+ if (character == c) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool is_word_separator(const char character) {
+ return (( isascii(character))
+ && (!isalnum(character))
+ && ( character != '_'));
+}
+
+static bool magic(const char magic_char, const char to_enchant) {
+ switch(magic_char){
+ // \i identifier character (see 'isident' option)
+ // \I like "\i", but excluding digits
+ // \k keyword character (see 'iskeyword' option)
+ // \K like "\k", but excluding digits
+ // \f file name character (see 'isfname' option)
+ // \F like "\f", but excluding digits
+ // \p printable character (see 'isprint' option)
+ // \P like "\p", but excluding digits
+ case 's': {
+ return ((to_enchant == ' ') || (to_enchant == '\t'));
+ }
+ case 'S': {
+ return !((to_enchant == ' ') || (to_enchant == '\t'));
+ }
+ case 'd': { // [0-9]
+ return char_in_range('0', '9', to_enchant);
+ };
+ case 'D': { // [^0-9]
+ return !char_in_range('0', '9', to_enchant);
+ };
+ case 'x': { // [0-9A-Fa-f]
+ return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
+ };
+ case 'X': { // [^0-9A-Fa-f]
+ return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
+ };
+ case 'o': { // [0-7]
+ return char_in_range('0', '7', to_enchant);
+ };
+ case 'O': { // [^0-7]
+ return !char_in_range('0', '7', to_enchant);
+ };
+ case 'w': { // [0-9A-Za-z_]
+ return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+ };
+ case 'W': { // [^0-9A-Za-z_]
+ return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+ };
+ case 'h': { // [A-Za-z_]
+ return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+ };
+ case 'H': { // [^A-Za-z_]
+ return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+ };
+ case 'a': { // [A-Za-z]
+ return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
+ };
+ case 'A': { // [A-Za-z]
+ return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
+ };
+ case 'l': { // [a-z]
+ return char_in_range('a', 'z', to_enchant);
+ };
+ case 'L': { // [^a-z]
+ return !(char_in_range('a', 'z', to_enchant));
+ };
+ case 'u': { // [A-Z]
+ return char_in_range('A', 'Z', to_enchant);
+ };
+ case 'U': { // [^A-Z]
+ return !(char_in_range('A', 'Z', to_enchant));
+ };
+ }
+
+ return false;
+}
+
+int regex_match(const char * const pattern,
+ const char * const string) {
+ const char * pattern_pointer = pattern;
+ const char * string_pointer = string;
+
+ while (1488) {
+ // End of one of the arguments
+ if (!(*pattern_pointer)) {
+ break;
+ }
+ if (!(*string_pointer)) {
+ return false;
+ }
+
+ // Escape character
+ if (*pattern_pointer == '\\') {
+ if (!is_next_valid(pattern_pointer)) {
+ return false;
+ }
+
+ switch(*(pattern_pointer + 1)){
+ case 't': {
+ if (*(string_pointer + 1) == '\t') {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ } else {
+ return false;
+ }
+ } break;
+ case 'r': {
+ if (*(string_pointer + 1) == '\r') {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ } else {
+ return false;
+ }
+ } break;
+ case 'e': {
+ if (*(string_pointer + 1) == '\033') {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ } else {
+ return false;
+ }
+ } break;
+ case 'b': {
+ if (*(string_pointer + 1) == '\010') {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ } else {
+ return false;
+ }
+ } break;
+ }
+
+ if (*(pattern_pointer + 1) == '\\') {
+ if (*string_pointer == '\\') {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ continue;
+ }
+ }
+
+ if (*(pattern_pointer + 1) == '<'
+ && is_word_separator(*string_pointer)) {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ continue;
+ }
+
+ if (*(pattern_pointer + 1) == '>') {
+ if (is_word_separator(*(string_pointer + 1))) {
+ pattern_pointer += 2;
+ continue;
+ }
+ if (*(string_pointer + 1) == '\00') {
+ break;
+ }
+ }
+
+ if (magic(*(pattern_pointer + 1), *string_pointer)) {
+ pattern_pointer += 2;
+ string_pointer += 1;
+ continue;
+ }
+
+ return false;
+ }
+
+ // Literal
+ if (*pattern_pointer != *string_pointer) {
+ return false;
+ } else {
+ ++pattern_pointer;
+ ++string_pointer;
+ }
+ }
+
+ return (string_pointer - string);
+}
--- /dev/null
+#include "chad.h"
+#include <ctype.h>
+
+extern bool is_case_on;
+
+int regex_match(const char * const pattern, const char * const string);