From 385a8f9818f8cc9ad61c9e5147460685c4ad2f61 Mon Sep 17 00:00:00 2001
From: anon <anon@anon.anon>
Date: Mon, 21 Aug 2023 20:07:39 +0200
Subject: [PATCH] the implemented parts of regex werk

---
 chad.mk        |  3 ++-
 source/hl.h    | 36 ++++++++++++++++++++++++------------
 source/main.c  |  8 ++++----
 source/regex.c | 26 +++++++++++++++++++-------
 source/regex.h |  2 +-
 5 files changed, 50 insertions(+), 25 deletions(-)

diff --git a/chad.mk b/chad.mk
index 21d7dd9..dc87793 100644
--- a/chad.mk
+++ b/chad.mk
@@ -5,6 +5,7 @@ ARGS:=${TARGET} < source/main.c
 GCC:=gcc
 D.versions:=-D_XOPEN_SOURCE=700
 GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef
+GCC.debug:=-Og -ggdb -pg -fno-inline
 
 CLANG:=clang
 CLANG.warnings:=-Weverything
@@ -13,6 +14,6 @@ VALGRIND:=valgrind
 VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all
 
 chad_test:
-	${GCC} ${D.versions} ${GCC.warnings} ${SRC} -o ${TARGET}
 	${CLANG} ${D.versions} ${GCC.warnings} ${SRC} -o ${TARGET}
+	${GCC} ${D.versions} ${GCC.debug} ${GCC.warnings} ${SRC} -o ${TARGET}
 	${VALGRIND} ${VALGRIND.flags} $(shell pwd)/${TARGET} ${ARGS}
diff --git a/source/hl.h b/source/hl.h
index 985b849..dcf2894 100644
--- a/source/hl.h
+++ b/source/hl.h
@@ -56,6 +56,10 @@ void new_display_mode(display_t * mode) {
 }
 
 int free_token(token_t * token){
+	/* XXX: since hl could be shared,
+	 * this might free an already freed object
+	 * when invoked from a loop
+	 */
 	free(token->hl);
 	free(token->syntax);
 	return 0;
@@ -94,8 +98,8 @@ int new_symbol_tokens(const char       * const *     symbols,
 	return i;
 }
 
-int new_char_tokens(const char       *         characters,
-                          hl_group_t * const            g) {
+int new_char_tokens(const char       *       characters,
+                          hl_group_t * const          g) {
 	int i = 0;
 	char buffer[2];
 	buffer[1] = '\00';
@@ -163,8 +167,10 @@ token_t * new_token(const char         * const word,
 // ### Highlighting ###
 // --------------------
 
-int token_fits(const token_t* const token,
-               const char*    const    to) {
+int token_fits(const token_t *   const         token,
+               const char    *   const            to,
+               const int               string_offset,
+			         int     *          match_offset) {
 
 	const char * const pattern = token->syntax;
 
@@ -172,16 +178,17 @@ int token_fits(const token_t* const token,
 		return true;
 	}
 
-	return regex_match(pattern, to);
+	return regex_match(pattern, to, string_offset, match_offset);
 }
 
 void render_string(const char * const string,
-                   const char * const mode) {
+                   const char * const   mode) {
 	for (const char * s = string; *s != '\00';) {
 		int f;
-		int i = 0;
-		for (; i < token_table_top; i++) {
-			f = token_fits(token_table[i], s);
+		int token_index = 0;
+		int offset;
+		for (; token_index < token_table_top; token_index++) {
+			f = token_fits(token_table[token_index], string, s - string, &offset);
 			if(f){ break; }
 		}
 		//
@@ -191,10 +198,15 @@ void render_string(const char * const string,
 		              display);
 		//
 		if (f) {
-			display->callback(s,
+			for(int i = 0; i < offset; i++){
+				display->callback(s + i,
+								  0,
+								  token_table[token_index]->hl->attributes);
+			}
+			display->callback(s + offset,
 			                  f,
-			                  token_table[i]->hl->attributes);
-			s += f;
+			                  token_table[token_index]->hl->attributes);
+			s += f + offset;
 		} else {
 			display->callback(s,
 			                  0,
diff --git a/source/main.c b/source/main.c
index 6dda8c7..3a0ff0f 100644
--- a/source/main.c
+++ b/source/main.c
@@ -1,5 +1,4 @@
-//register
-//putchar()
+const int ew;
 #include <assert.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -114,13 +113,14 @@ int main(int      argc,
 	};
 	//
 	new_display_mode(cterm);
+	new_char_tokens("&|()[]{}*,", &symbol_hl);
 	new_keyword_tokens(c_keywords, &keyword_hl);
 	new_keyword_tokens(preprocessor_keywords, &preprocessor_hl);
-	new_char_tokens("&|()[]{}*,", &symbol_hl);
 	//
 	render_string(buffer, "cterm");
 	putchar('\n');
-	hl_deinit();
+	fflush(stdout);
+	//hl_deinit();
 	free(buffer);
 
 	return 0;
diff --git a/source/regex.c b/source/regex.c
index e9e37ae..3dcb16a 100644
--- a/source/regex.c
+++ b/source/regex.c
@@ -97,10 +97,14 @@ static bool magic(const char magic_char, const char to_enchant) {
 	return false;
 }
 
-int regex_match(const char * const pattern,
-                   const char * const  string) {
+int regex_match(const char * const       pattern,
+                const char * const  string_start,
+				const int          string_offset,
+				      int  *        match_offset_) {
 	const char * pattern_pointer = pattern;
-	const char * string_pointer = string;
+	const char * string_pointer = string_start + string_offset;
+	const char * const match_base = string_pointer;
+	int match_offset = 0;
 
 	while (1488) {
 		// End of one of the arguments
@@ -160,15 +164,20 @@ int regex_match(const char * const pattern,
 				}
 			}
 
-			if (*(pattern_pointer + 1) == '<'
-			&& (is_word_separator(*string_pointer))) {
+			if (*(pattern_pointer + 1) == '<') {
+				if (is_word_separator(*string_pointer)) {
 					pattern_pointer += 2;
 					string_pointer += 1;
+					match_offset += 1;
 					continue;
+				} else if (string_pointer == string_start) {
+					pattern_pointer += 2;
+					continue;
+				}
 			}
 
 			if (*(pattern_pointer + 1) == '>') {
-				if (is_word_separator(*(string_pointer + 1))) {
+				if (is_word_separator(*string_pointer)) {
 						pattern_pointer += 2;
 						continue;
 				}
@@ -195,5 +204,8 @@ int regex_match(const char * const pattern,
 		}
 	}
 
-	return (string_pointer - string);
+	if (match_offset_) {
+		*match_offset_ = match_offset;
+	}
+	return (string_pointer - match_base) - match_offset;
 }
diff --git a/source/regex.h b/source/regex.h
index daea895..11706f4 100644
--- a/source/regex.h
+++ b/source/regex.h
@@ -3,4 +3,4 @@
 
 extern bool is_case_on;
 
-int regex_match(const char * const pattern, const char * const  string);
+extern int regex_match(const char * const pattern, const char * const  string, const int string_offset, int * match_offset_);