anon prototype is (kinda) pretty now

author anon <anon@anon.anon>

Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)

committer anon <anon@anon.anon>

Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)
author anon <anon@anon.anon>
Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)
committer anon <anon@anon.anon>
Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)
diff --git a/BUGS.md b/BUGS.md

new file mode 100644 (file)

index 0000000..9736a8e
--- /dev/null
+++ b/BUGS.md
@@ -0,0 +1,5 @@
+## Bugs
+ + segfaults under -O2
+ + i cannot decypher the valgrind warnings
+ + a sinle character right before keywords is always highlighted; the bug is understood, the resolution design is under contemplation
+ + newlines are not yet given special treatment in regex_match()
diff --git a/Makefile b/Makefile

index b6b347341595de9522aa3b83ad74adb1377381d9..b364514698f8e0219218e3f464a9738e08be61df 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
  include chad.mk
  DEBUG:=1
-CFLAGS:=-std=c99 -O2 -Wvla -Wshadow -Wundef $(if ${DEBUG}, ${CHAD_DEBUG},'')
+CFLAGS:=-std=c99 -O2 $(if ${DEBUG}, ${CHAD_DEBUG},'')
  CPPFLAGS:=-D_FORTIFY_SOURCE=2
  
  SRC.dir:=source/
diff --git a/chad.mk b/chad.mk

index 161bf9058babe2c3d9430843a2c181411efd4a51..0b3067385dc2e2733a981dd30a722a0cb12fda3d 100644 (file)
--- a/chad.mk
+++ b/chad.mk
@@ -8,14 +8,15 @@ CHAD_DEBUG:=-Og -ggdb -pg -fno-inline
  
  # Programs to check warnings for as defined by the Chad standard
  GCC:=gcc
-GCC.warnings:=-Wall -Wextra -Wpedantic
+GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef 
  CLANG:=clang
  CLANG.warnings:=-Weverything
  VALGRIND:=valgrind
+VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all
  
  chad_test:
         ${GCC} ${GCC.warnings} ${SRC} -o ${OUT}
         ${CLANG} ${GCC.warnings} ${SRC} -o ${OUT}
-       ${VALGRIND} ${OUT} ${OUTARGS}
+       ${VALGRIND} ${VALGRIND.flags} ${OUT} ${OUTARGS}
  
  .DEFAULT_GOAL:=main
diff --git a/source/hl.h b/source/hl.h

index 693ee73312564338eca3912923d1d6ee4ffb609d..a831a477dbe46d8c7d0f4d67976c230f9e9efe80 100644 (file)
--- a/source/hl.h
+++ b/source/hl.h
@@ -3,6 +3,7 @@
  #include <ctype.h>
  #include <string.h>
  #include "chad.h"
+#include "regex.h"
  
  typedef void (*attribute_callback_t)(const char * const string,
                                       const int          length,
@@ -39,54 +40,111 @@ typedef struct {
  token_t * token_table[1000];
  int token_table_top = 0;
  
-token_t * new_token(const char       * const syntax,
-                  const token_type_t            t,
-                  const hl_group_t * const      g) {
+int append_token(token_t * token){
+       token_table[token_table_top++] = token;
+       return 0;
+}
+
+token_t * new_symbol_token(const char         * const word,
+                                 hl_group_t   * const    g) {
+
+       char * new_word = strdup(word);
+
         token_t * mt = (token_t*)malloc(sizeof(token_t));
         mt->hl = g;
-       mt->t = t;
-       mt->syntax = syntax;
-       token_table[token_table_top++] = mt;
+       mt->t = KEYSYMBOL;
+       mt->syntax = new_word;
+       append_token(mt);
         return mt;
+
  }
  
-void new_keyword_tokens(const char       * const *       words,
-                              hl_group_t * const             g) {
+int new_symbol_tokens(const char       * const *     symbols,
+                            hl_group_t * const             g) {
+
         int i = 0;
-       while (*words) {
-               if(new_token(*words, KEYWORD, g)){
+       while (*symbols) {
+               if(new_symbol_token(*symbols, g)){
                         ++i;
                 }
-               ++words;
+               ++symbols;
         }
  
         return i;
  }
  
-int token_fits(const char* const pattern,
-               const char* const      to) {
-       if (pattern == NULL) {
-               return true;
-       }
-       for (int i = 0;; i++) {
-               if (pattern[i] == '\00') {
-                       return i;
+int new_char_tokens(const char       *         characters,
+                          hl_group_t * const            g) {
+       int i = 0;
+       char buffer[2];
+       buffer[1] = '\00';
+       for(const char * s = characters; *s != '\00'; s++){
+               buffer[0] = *s;
+               if(new_symbol_token(buffer, g)){
+                       ++i;
                 }
-               if (to[i] == '\00'
-               ||  pattern[i] != to[i]) {
-                       return false;
+       }
+       return i;
+}
+
+token_t * new_keyword_token(const char         * const word,
+                                  hl_group_t   * const    g) {
+
+       size_t word_length = strlen(word);
+       char * new_word = (char*)malloc(word_length + 4 + 1);
+       memcpy(new_word, "\\<", 2);
+       memcpy(new_word + 2, word, word_length);
+       strcpy(new_word + 2 + word_length, "\\>");
+
+       token_t * mt = (token_t*)malloc(sizeof(token_t));
+       mt->hl = g;
+       mt->t = KEYWORD;
+       mt->syntax = new_word;
+       append_token(mt);
+       return mt;
+}
+
+token_t * new_token(const char         * const word,
+                    const token_type_t            t,
+                          hl_group_t   * const    g) {
+       switch(t){
+               case KEYSYMBOL: {
+                       return new_symbol_token(word, g);
+               };
+               case KEYWORD: {
+                       return new_keyword_token(word, g);
+               };
+               case MATCH: {
+               } break;
+               case REGION: {
+               } break;
+       }
+       // XXX: implement the rest
+}
+
+int new_keyword_tokens(const char       * const *       words,
+                             hl_group_t * const             g) {
+       int i = 0;
+       while (*words) {
+               if(new_keyword_token(*words, g)){
+                       ++i;
                 }
+               ++words;
         }
+
+       return i;
  }
  
-bool is_word_separator(const char character) {
-       if (( isascii(character))
-       &&  (!isalnum(character))
-       &&  ( character != '_')) {
-               return 1;
-       } else {
-               return 0;
+int token_fits(const token_t* const token,
+               const char*    const    to) {
+
+       const char * const pattern = token->syntax;
+
+       if (pattern == NULL) {
+               return true;
         }
+
+       return regex_match(pattern, to);
  }
  
  void render_string(const char * const string,
@@ -95,7 +153,7 @@ void render_string(const char * const string,
                 int f;
                 int i = 0;
                 for (; i < token_table_top; i++) {
-                       f = token_fits(token_table[i]->syntax, s);
+                       f = token_fits(token_table[i], s);
                         if(f){ break; }
                 }
                 //
diff --git a/source/main.c b/source/main.c

index f924641f7232250d966df0f2c50d6b697f8eb1a1..a43c8cf7e1d73e8371387d1def35a7f3b36c7b69 100644 (file)
--- a/source/main.c
+++ b/source/main.c
@@ -1,3 +1,5 @@
+//register
+//putchar()
  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
@@ -10,21 +12,24 @@ static char * buffer      = NULL;
  static size_t buffer_size = 0;
  
  typedef struct {
-       int attribute;
-       int foreground_color;
-       int background_color;
+       char * attribute;
+       char * foreground_color;
+       char * background_color;
  } terminal_hl_t;
  
  void cterm_render_callback(const char * const string,
                             const int          length,
                             void       * const attributes) {
         if(!length){
+               fputs(TERMINAL_STYLE_BOLD, stdout);
                 putchar(*string);
+               fputs(TERMINAL_RESET, stdout);
                 return;
         }
  
-       UNUSED(attributes);
-       fputs(TERMINAL_STYLE_BOLD, stdout);
+       terminal_hl_t * term_hl = (terminal_hl_t*)attributes;
+       fputs(term_hl->attribute, stdout);
+       fputs(term_hl->foreground_color, stdout);
         for (int i = 0; i < length; i++) {
                 putchar(*(string+i));
         }
@@ -68,25 +73,50 @@ int main(int      argc,
           NULL
         };
  
-       terminal_hl_t my_hl = (terminal_hl_t) {
-               .attribute = 1
-       };
-
+       //
         display_t * cterm = &(display_t) {
                 .key = "cterm",
                 .callback = cterm_render_callback
         };
-       hl_group_t mygroup = (hl_group_t) {
-               .link = NULL
+       //
+       terminal_hl_t terminal_keyword_hl = (terminal_hl_t) {
+               .attribute = TERMINAL_STYLE_BOLD,
+               .foreground_color = TERMINAL_COLOR_FG_GREEN,
+               .background_color = NULL
+       };
+       hl_group_t keyword_hl = (hl_group_t) {
+               .link = NULL,
+               .attributes = (void*)&terminal_keyword_hl
+       };
+       //
+       terminal_hl_t terminal_preprocessor_hl = (terminal_hl_t) {
+               .attribute = TERMINAL_STYLE_BOLD,
+               .foreground_color = TERMINAL_COLOR_FG_BLUE,
+               .background_color = NULL
         };
+       hl_group_t preprocessor_hl = (hl_group_t) {
+               .link = NULL,
+               .attributes = (void*)&terminal_preprocessor_hl
+       };
+       //
+       terminal_hl_t terminal_symbol_hl = (terminal_hl_t) {
+               .attribute = TERMINAL_STYLE_BOLD,
+               .foreground_color = TERMINAL_COLOR_FG_YELLOW,
+               .background_color = NULL
+       };
+       hl_group_t symbol_hl = (hl_group_t) {
+               .link = NULL,
+               .attributes = (void*)&terminal_symbol_hl
+       };
+       //
         new_display_mode(cterm);
-       new_keyword_tokens(c_keywords, &mygroup);
-       new_keyword_tokens(preprocessor_keywords, &mygroup);
-
+       new_keyword_tokens(c_keywords, &keyword_hl);
+       new_keyword_tokens(preprocessor_keywords, &preprocessor_hl);
+       new_char_tokens("&|()[]{}*,", &symbol_hl);
         //
         render_string(buffer, "cterm");
         putchar('\n');
-       free (buffer);
+       free(buffer);
  
         return 0;
  }
diff --git a/source/regex.c b/source/regex.c

new file mode 100644 (file)

index 0000000..e9e2787
--- /dev/null
+++ b/source/regex.c
@@ -0,0 +1,199 @@
+#include "regex.h"
+
+bool is_case_on = true;
+
+static bool is_next_valid(const char * const s) {
+       return *(s + 1);
+}
+
+static bool char_in_range(const char     start,
+                          const char       end,
+                          const char character) {
+       if (start > end){
+               return false;
+       }
+
+       for (char c = start; c != end; c++) {
+               if (character == c) {
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+static bool is_word_separator(const char character) {
+       return (( isascii(character))
+           &&  (!isalnum(character))
+           &&  ( character != '_'));
+}
+
+static bool magic(const char magic_char, const char to_enchant) {
+       switch(magic_char){
+               //      \i      identifier character (see 'isident' option)
+               //      \I      like "\i", but excluding digits
+               //      \k      keyword character (see 'iskeyword' option)
+               //      \K      like "\k", but excluding digits
+               //      \f      file name character (see 'isfname' option)
+               //      \F      like "\f", but excluding digits
+               //      \p      printable character (see 'isprint' option)
+               //      \P      like "\p", but excluding digits
+               case 's': {
+                       return ((to_enchant == ' ') || (to_enchant == '\t'));
+               }
+               case 'S': {
+                       return !((to_enchant == ' ') || (to_enchant == '\t'));
+               }
+               case 'd': {     // [0-9]
+                       return char_in_range('0', '9', to_enchant);
+               };
+               case 'D': {     // [^0-9]
+                       return !char_in_range('0', '9', to_enchant);
+               };
+               case 'x': { // [0-9A-Fa-f]
+                       return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
+               };
+               case 'X': { // [^0-9A-Fa-f]
+                       return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
+               };
+               case 'o': { // [0-7]
+                       return char_in_range('0', '7', to_enchant);
+               };
+               case 'O': { // [^0-7]
+                       return !char_in_range('0', '7', to_enchant);
+               };
+               case 'w': { // [0-9A-Za-z_]
+                       return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+               };
+               case 'W': { // [^0-9A-Za-z_]
+                       return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+               };
+               case 'h': { // [A-Za-z_]
+                       return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+               };
+               case 'H': { // [^A-Za-z_]
+                       return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+               };
+               case 'a': { // [A-Za-z]
+                       return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
+               };
+               case 'A': { // [A-Za-z]
+                       return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
+               };
+               case 'l': { // [a-z]
+                       return char_in_range('a', 'z', to_enchant);
+               };
+               case 'L': { // [^a-z]
+                       return !(char_in_range('a', 'z', to_enchant));
+               };
+               case 'u': { // [A-Z]
+                       return char_in_range('A', 'Z', to_enchant);
+               };
+               case 'U': { // [^A-Z]
+                       return !(char_in_range('A', 'Z', to_enchant));
+               };
+       }
+
+       return false;
+}
+
+int regex_match(const char * const pattern,
+                   const char * const  string) {
+       const char * pattern_pointer = pattern;
+       const char * string_pointer = string;
+
+       while (1488) {
+               // End of one of the arguments
+               if (!(*pattern_pointer)) {
+                       break;
+               }
+               if (!(*string_pointer)) {
+                       return false;
+               }
+
+               // Escape character
+               if (*pattern_pointer == '\\') {
+                       if (!is_next_valid(pattern_pointer)) {
+                               return false;
+                       }
+
+                       switch(*(pattern_pointer + 1)){
+                               case 't': {
+                                       if (*(string_pointer + 1) == '\t') {
+                                               pattern_pointer += 2;
+                                               string_pointer += 1;
+                                       } else {
+                                               return false;
+                                       }
+                               } break;
+                               case 'r': {
+                                       if (*(string_pointer + 1) == '\r') {
+                                               pattern_pointer += 2;
+                                               string_pointer += 1;
+                                       } else {
+                                               return false;
+                                       }
+                               } break;
+                               case 'e': {
+                                       if (*(string_pointer + 1) == '\033') {
+                                               pattern_pointer += 2;
+                                               string_pointer += 1;
+                                       } else {
+                                               return false;
+                                       }
+                               } break;
+                               case 'b': {
+                                       if (*(string_pointer + 1) == '\010') {
+                                               pattern_pointer += 2;
+                                               string_pointer += 1;
+                                       } else {
+                                               return false;
+                                       }
+                               } break;
+                       }
+
+                       if (*(pattern_pointer + 1) == '\\') {
+                               if (*string_pointer == '\\') {
+                                       pattern_pointer += 2;
+                                       string_pointer += 1;
+                                       continue;
+                               }
+                       }
+
+                       if (*(pattern_pointer + 1) == '<'
+                       && is_word_separator(*string_pointer)) {
+                                       pattern_pointer += 2;
+                                       string_pointer += 1;
+                                       continue;
+                       }
+
+                       if (*(pattern_pointer + 1) == '>') {
+                               if (is_word_separator(*(string_pointer + 1))) {
+                                               pattern_pointer += 2;
+                                               continue;
+                               }
+                               if (*(string_pointer + 1) == '\00') {
+                                       break;
+                               }
+                       }
+
+                       if (magic(*(pattern_pointer + 1), *string_pointer)) {
+                               pattern_pointer += 2;
+                               string_pointer += 1;
+                               continue;
+                       }
+
+                       return false;
+               }
+
+               // Literal
+               if (*pattern_pointer != *string_pointer) {
+                       return false;
+               } else {
+                       ++pattern_pointer;
+                       ++string_pointer;
+               }
+       }
+
+       return (string_pointer - string);
+}
diff --git a/source/regex.h b/source/regex.h

new file mode 100644 (file)

index 0000000..daea895
--- /dev/null
+++ b/source/regex.h
@@ -0,0 +1,6 @@
+#include "chad.h"
+#include <ctype.h>
+
+extern bool is_case_on;
+
+int regex_match(const char * const pattern, const char * const  string);
author	anon <anon@anon.anon>
	Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)
committer	anon <anon@anon.anon>
	Sat, 19 Aug 2023 22:49:10 +0000 (00:49 +0200)
BUGS.md	[new file with mode: 0644]	patch \| blob
Makefile		patch \| blob \| history
chad.mk		patch \| blob \| history
source/hl.h		patch \| blob \| history
source/main.c		patch \| blob \| history
source/regex.c	[new file with mode: 0644]	patch \| blob
source/regex.h	[new file with mode: 0644]	patch \| blob