the implemented parts of regex werk

author anon <anon@anon.anon>

Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)

committer anon <anon@anon.anon>

Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)
author anon <anon@anon.anon>
Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)
committer anon <anon@anon.anon>
Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)
diff --git a/chad.mk b/chad.mk

index 21d7dd9f52cd4ef68bec939f66fd7e4351322781..dc8779343657479df20cad98af95c3eb086a6401 100644 (file)
--- a/chad.mk
+++ b/chad.mk
@@ -5,6 +5,7 @@ ARGS:=${TARGET} < source/main.c
  GCC:=gcc
  D.versions:=-D_XOPEN_SOURCE=700
  GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef
+GCC.debug:=-Og -ggdb -pg -fno-inline
  
  CLANG:=clang
  CLANG.warnings:=-Weverything
@@ -13,6 +14,6 @@ VALGRIND:=valgrind
  VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all
  
  chad_test:
-       ${GCC} ${D.versions} ${GCC.warnings} ${SRC} -o ${TARGET}
         ${CLANG} ${D.versions} ${GCC.warnings} ${SRC} -o ${TARGET}
+       ${GCC} ${D.versions} ${GCC.debug} ${GCC.warnings} ${SRC} -o ${TARGET}
         ${VALGRIND} ${VALGRIND.flags} $(shell pwd)/${TARGET} ${ARGS}
diff --git a/source/hl.h b/source/hl.h

index 985b84966e60ab5f8dcbade24d9d7c35b5a10053..dcf2894a2f871e14e6c043e765398fbf85bec864 100644 (file)
--- a/source/hl.h
+++ b/source/hl.h
@@ -56,6 +56,10 @@ void new_display_mode(display_t * mode) {
  }
  
  int free_token(token_t * token){
+       /* XXX: since hl could be shared,
+        * this might free an already freed object
+        * when invoked from a loop
+        */
         free(token->hl);
         free(token->syntax);
         return 0;
@@ -94,8 +98,8 @@ int new_symbol_tokens(const char       * const *     symbols,
         return i;
  }
  
-int new_char_tokens(const char       *         characters,
-                          hl_group_t * const            g) {
+int new_char_tokens(const char       *       characters,
+                          hl_group_t * const          g) {
         int i = 0;
         char buffer[2];
         buffer[1] = '\00';
@@ -163,8 +167,10 @@ token_t * new_token(const char         * const word,
  // ### Highlighting ###
  // --------------------
  
-int token_fits(const token_t* const token,
-               const char*    const    to) {
+int token_fits(const token_t *   const         token,
+               const char    *   const            to,
+               const int               string_offset,
+                                int     *          match_offset) {
  
         const char * const pattern = token->syntax;
  
@@ -172,16 +178,17 @@ int token_fits(const token_t* const token,
                 return true;
         }
  
-       return regex_match(pattern, to);
+       return regex_match(pattern, to, string_offset, match_offset);
  }
  
  void render_string(const char * const string,
-                   const char * const mode) {
+                   const char * const   mode) {
         for (const char * s = string; *s != '\00';) {
                 int f;
-               int i = 0;
-               for (; i < token_table_top; i++) {
-                       f = token_fits(token_table[i], s);
+               int token_index = 0;
+               int offset;
+               for (; token_index < token_table_top; token_index++) {
+                       f = token_fits(token_table[token_index], string, s - string, &offset);
                         if(f){ break; }
                 }
                 //
@@ -191,10 +198,15 @@ void render_string(const char * const string,
                               display);
                 //
                 if (f) {
-                       display->callback(s,
+                       for(int i = 0; i < offset; i++){
+                               display->callback(s + i,
+                                                                 0,
+                                                                 token_table[token_index]->hl->attributes);
+                       }
+                       display->callback(s + offset,
                                           f,
-                                         token_table[i]->hl->attributes);
-                       s += f;
+                                         token_table[token_index]->hl->attributes);
+                       s += f + offset;
                 } else {
                         display->callback(s,
                                           0,
diff --git a/source/main.c b/source/main.c

index 6dda8c711ea3bc46447c2cb8a8a2c448f305a435..3a0ff0fa469044d4443196a9da45af893747bcd4 100644 (file)
--- a/source/main.c
+++ b/source/main.c
@@ -1,5 +1,4 @@
-//register
-//putchar()
+const int ew;
  #include <assert.h>
  #include <stdio.h>
  #include <stdlib.h>
@@ -114,13 +113,14 @@ int main(int      argc,
         };
         //
         new_display_mode(cterm);
+       new_char_tokens("&|()[]{}*,", &symbol_hl);
         new_keyword_tokens(c_keywords, &keyword_hl);
         new_keyword_tokens(preprocessor_keywords, &preprocessor_hl);
-       new_char_tokens("&|()[]{}*,", &symbol_hl);
         //
         render_string(buffer, "cterm");
         putchar('\n');
-       hl_deinit();
+       fflush(stdout);
+       //hl_deinit();
         free(buffer);
  
         return 0;
diff --git a/source/regex.c b/source/regex.c

index e9e37ae4fe6ef59c147922229f124f680ae704da..3dcb16a87d19e6a1238f977cfada5537e12a3c43 100644 (file)
--- a/source/regex.c
+++ b/source/regex.c
@@ -97,10 +97,14 @@ static bool magic(const char magic_char, const char to_enchant) {
         return false;
  }
  
-int regex_match(const char * const pattern,
-                   const char * const  string) {
+int regex_match(const char * const       pattern,
+                const char * const  string_start,
+                               const int          string_offset,
+                                     int  *        match_offset_) {
         const char * pattern_pointer = pattern;
-       const char * string_pointer = string;
+       const char * string_pointer = string_start + string_offset;
+       const char * const match_base = string_pointer;
+       int match_offset = 0;
  
         while (1488) {
                 // End of one of the arguments
@@ -160,15 +164,20 @@ int regex_match(const char * const pattern,
                                 }
                         }
  
-                       if (*(pattern_pointer + 1) == '<'
-                       && (is_word_separator(*string_pointer))) {
+                       if (*(pattern_pointer + 1) == '<') {
+                               if (is_word_separator(*string_pointer)) {
                                         pattern_pointer += 2;
                                         string_pointer += 1;
+                                       match_offset += 1;
                                         continue;
+                               } else if (string_pointer == string_start) {
+                                       pattern_pointer += 2;
+                                       continue;
+                               }
                         }
  
                         if (*(pattern_pointer + 1) == '>') {
-                               if (is_word_separator(*(string_pointer + 1))) {
+                               if (is_word_separator(*string_pointer)) {
                                                 pattern_pointer += 2;
                                                 continue;
                                 }
@@ -195,5 +204,8 @@ int regex_match(const char * const pattern,
                 }
         }
  
-       return (string_pointer - string);
+       if (match_offset_) {
+               *match_offset_ = match_offset;
+       }
+       return (string_pointer - match_base) - match_offset;
  }
diff --git a/source/regex.h b/source/regex.h

index daea8957158eb7e2169b615c475776b394aee6e7..11706f4e99bda0d614da081450cb119772fc2a6f 100644 (file)
--- a/source/regex.h
+++ b/source/regex.h
@@ -3,4 +3,4 @@
  
  extern bool is_case_on;
  
-int regex_match(const char * const pattern, const char * const  string);
+extern int regex_match(const char * const pattern, const char * const  string, const int string_offset, int * match_offset_);
author	anon <anon@anon.anon>
	Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)
committer	anon <anon@anon.anon>
	Mon, 21 Aug 2023 18:07:39 +0000 (20:07 +0200)
chad.mk		patch \| blob \| history
source/hl.h		patch \| blob \| history
source/main.c		patch \| blob \| history
source/regex.c		patch \| blob \| history
source/regex.h		patch \| blob \| history