From: anon <anon@anon.anon>
Date: Tue, 29 Aug 2023 14:23:39 +0000 (+0200)
Subject: Merge branch 'master' of https://git.lain.church/emil/hl
X-Git-Url: https://git.xolatile.top/?a=commitdiff_plain;h=2c44086b843f06adaca1619cfa6c86558fca76ee;p=public-libhl.git

Merge branch 'master' of https://git.lain.church/emil/hl
---

2c44086b843f06adaca1619cfa6c86558fca76ee
diff --cc include/hl.h
index 0000000,91ced6a..c338f0f
mode 000000,100644..100644
--- a/include/hl.h
+++ b/include/hl.h
@@@ -1,0 -1,328 +1,336 @@@
+ /* hl.h
+  * Copyright 2023 Anon Anonson, Ognjen 'xolatile' Milan Robovic, Emil Williams
+  * SPDX Identifier: GPL-3.0-only / NO WARRANTY / NO GUARANTEE */
+ 
+ #include <stdio.h>
+ #include <uthash.h>
+ #include <ctype.h>
+ #include <string.h>
+ #include <stdbool.h>
+ #include "chad.h"
+ #include "regex.h"
+ 
+ // -------------------
+ // ### Definitions ###
+ // -------------------
+ 
+ typedef enum {
+ 	KEYSYMBOL,
+ 	KEYWORD,
+ 	MATCH,
+ 	REGION
+ } token_type_t;
+ 
+ typedef void (*attribute_callback_t) (const char * string,
+                                       const int    length,
+                                             void * attributes);
+ 
+ typedef struct {
+ 	char                 * key;
+ 	attribute_callback_t   callback;
+ 	UT_hash_handle         hh;
+ } display_t;
+ 
+ typedef struct {
+ 	void              * attributes;
+ 	struct hl_group_t * link;
+ } hl_group_t;
+ 
+ typedef struct {
+ 	hl_group_t   * hl;
+ 	regex_t      * syntax;
+ 	token_type_t   t;
+ 	char           _pad[4];
+ } token_t;
+ 
+ extern vector_t    token_table;
+ extern display_t * display_table;
+ 
+ extern hl_group_t * keyword_hl;
+ extern hl_group_t * preprocessor_hl;
+ extern hl_group_t * symbol_hl;
+ 
+ extern void new_display_mode(display_t * mode);
+ extern int free_token(token_t * token);
+ extern int append_token(token_t * token);
+ 
+ // TODO: ALIGN PROPERLY...
+ 
+ extern token_t * new_symbol_token(const char         * const c,
+                                         hl_group_t   * const g);
+ 
+ extern int       new_symbol_tokens(const char       * const *     symbols,
+                                          hl_group_t * const             g);
+ 
+ extern int       new_char_tokens(const char       *              str,
+                                        hl_group_t * const          g);
+ 
+ extern token_t * new_keyword_token(const char         * const word,
+                                          hl_group_t   * const    g);
+ 
+ extern int       new_keyword_tokens(const char       * const * words,
+                                           hl_group_t * const   g);
+ 
+ extern token_t * new_token(const char         * const word,
+                            const token_type_t            t,
+                                  hl_group_t   * const    g);
+ 
+ // TODO: ALIGN PROPERLY...
+ 
 -extern int token_fits(const token_t * const  token,
 -                      const char    * const  to,
++extern int token_fits(const token_t * const          token,
++                      const char    * const             to,
+                       const int              string_offset,
 -                            int     *        match_offset);
++                      const bool            is_start_of_line,
++                            int     *         match_offset);
+ 
+ extern void render_string(const char * const string,
+                           const char * const mode);
+ 
+ extern int hl_init(void);
+ extern int hl_deinit(void);
+ 
+ // GLOBALS
+ 
+ vector_t token_table = {
+ 	.data          = NULL,
+ 	.element_size  = sizeof(token_t *),
+ 	.element_count = 0UL
+ };
+ 
+ display_t * display_table = NULL;
+ 
+ // --------------------------------
+ // ### Constructors/Destructors ###
+ // --------------------------------
+ 
+ void new_display_mode(display_t * mode) {
+ 	HASH_ADD_STR(display_table,
+ 	             key,
+ 	             mode);
+ }
+ 
+ int free_token(token_t * token) {
+ 	free(token->hl);
+ 	regex_free(token->syntax);
+ 
+ 	return 0;
+ }
+ 
+ int append_token(token_t * token) {
+ 	vector_push(&token_table, &token);
+ 
+ 	return 0;
+ }
+ 
+ token_t * new_symbol_token(const char         * const c,
+                                  hl_group_t   * const g) {
+ 
+ 	token_t * mt = (token_t*)malloc(sizeof(token_t));
+ 
+ 	mt->hl     = g;
+ 	mt->t      = KEYSYMBOL;
+ 	mt->syntax = regex_compile(c);
+ 
+ 	append_token(mt);
+ 
+ 	return mt;
+ }
+ 
+ int new_symbol_tokens(const char       * const *     symbols,
+                             hl_group_t * const             g) {
+ 	int i = 0;
+ 
+ 	while (*symbols) {
+ 		if(new_symbol_token(*symbols, g)) {
+ 			++i;
+ 		} else {
+ 			assert(!(bool)"Kinda failed to new symbol token thing.");
+ 		}
+ 		++symbols;
+ 	}
+ 
+ 	return i;
+ }
+ 
+ int new_char_tokens(const char       *              str,
+                           hl_group_t * const          g) {
+ 	int i = 0;
+ 
+ 	char buffer[3];
+ 	buffer[0] = '\\';
+ 	buffer[2] = '\0';
+ 
+ 	for(const char * s = str; *s != '\0'; s++) {
+ 		buffer[1] = *s;
+ 		if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) {
+ 			++i;
+ 		} else {
+ 			assert(!(bool)"Kinda failed to new char token thing.");
+ 		}
+ 	}
+ 
+ 	return i;
+ }
+ 
+ token_t * new_keyword_token(const char         * const word,
+                                   hl_group_t   * const    g) {
+ 	//char   * new_word = strdup(word);
+ 	//size_t   word_length = strlen(word);
+ 	//char   * new_word    = (char*)malloc(word_length + 4 + 1);
+ 
+ 	//memcpy(new_word, "\\<", 2);
+ 	//memcpy(new_word + 2, word, word_length);
+ 	//strcpy(new_word + 2 + word_length, "\\>");
+ 
+ 	token_t * mt = (token_t*)malloc(sizeof(token_t));
+ 
+ 	mt->hl     = g;
+ 	mt->t      = KEYWORD;
+ 	//mt->syntax = regex_compile(new_word);
+ 	mt->syntax = regex_compile(word);
+ 
+ 	append_token(mt);
+ 
+ 	return mt;
+ }
+ 
+ int new_keyword_tokens(const char       * const * words,
+                              hl_group_t * const   g) {
+ 	int i = 0;
+ 
+ 	while (*words) {
+ 		if(new_keyword_token(*words, g)) {
+ 			++i;
+ 		}
+ 		++words;
+ 	}
+ 
+ 	return i;
+ }
+ 
+ token_t * new_region_token(const char       * start,
+                            const char       *   end,
+                                  hl_group_t *       g) {
+ 	char buffer[100];
+ 	buffer[0] = '\0';
+ 	strcat(buffer, start);
+ 	strcat(buffer, "[\\d\\D]*");
+ 	strcat(buffer, end);
+ 
+ 	token_t * mt = (token_t*)malloc(sizeof(token_t));
+ 
+ 	mt->hl     = g;
+ 	mt->t      = KEYSYMBOL;
+ 	mt->syntax = regex_compile(buffer);
+ 
+ 	append_token(mt);
+ 
+ 	return mt;
+ }
+ 
+ token_t * new_token(const char         * const word,
+                     const token_type_t            t,
+                           hl_group_t   * const    g) {
+ 	switch (t) {
+ 		case KEYSYMBOL: {
+ 			return new_symbol_token(word, g);
+ 		}
+ 		case KEYWORD: {
+ 			return new_keyword_token(word, g);
+ 		}
+ 		case MATCH: {
++			token_t * mt = (token_t*)malloc(sizeof(token_t));
++				mt->hl     = g;
++				mt->t      = MATCH;
++				mt->syntax = regex_compile(word);
++			append_token(mt);
+ 		} break;
+ 		case REGION: {
+ 		} break;
+ 	}
+ 
+ 	return NULL;
+ }
+ 
+ // --------------------
+ // ### Highlighting ###
+ // --------------------
+ 
 -int token_fits(const token_t * const token,
 -               const char    * const to,
 -               const int             string_offset,
++int token_fits(const token_t * const            token,
++               const char    * const               to,
++               const int                string_offset,
++               const bool            is_start_of_line,
+                      int     *       match_offset) {
+   UNUSED(match_offset);
+ 	//return regex_match(pattern, to, string_offset, match_offset);
 -	return regex_match(token->syntax, to + string_offset);
++	return regex_match(token->syntax, to, is_start_of_line, string_offset);
+ }
+ 
+ void render_string(const char * const string,
+                    const char * const mode) {
+ 	for (const char * s = string; *s != '\00';) {
+ 		int    f           = 0;
+ 		size_t token_index = 0;
+ 		int    offset      = 0;
+ 
+ 		for (; token_index < token_table.element_count; token_index++) {
+ 			token_t * t = *(token_t**)vector_get(&token_table,
+ 			                                     token_index);
 -			f = token_fits(t, string, (int) (s - string), &offset);
++			const bool is_start_of_line = (s == string) || (*s == '\n');
++			f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset);
+ 			if (f) {
+ 				break;
+ 			}
+ 		}
+ 		//
+ 		display_t * display;
+ 		HASH_FIND_STR(display_table,
+ 		              mode,
+ 		              display);
+ 		//
+ 		if (f) {
+ 			for (int i = 0; i < offset; i++) {
+ 				token_t * t = *(token_t**)vector_get(&token_table,
+ 				                                     token_index);
+ 				display->callback(s + i,
+ 				                  0,
+ 				                  t->hl->attributes);
+ 			}
+ 			token_t * t = *(token_t**)vector_get(&token_table,
+ 			                                     token_index);
+ 			display->callback(s + offset,
+ 			                  f,
+ 			                  t->hl->attributes);
+ 			s += f + offset;
+ 		} else {
+ 			display->callback(s,
+ 			                  0,
+ 			                  NULL);
+ 			++s;
+ 		}
+ 	}
+ }
+ 
+ // -------------------------
+ // ### Library Mangement ###
+ // -------------------------
+ hl_group_t * special_hl          = NULL;
+ hl_group_t * control_hl          = NULL;
+ hl_group_t * keyword_hl          = NULL;
+ hl_group_t * block_hl            = NULL;
+ hl_group_t * separator_hl        = NULL;
+ hl_group_t * operator_hl         = NULL;
+ hl_group_t * comment_hl          = NULL;
+ hl_group_t * string_literal_hl   = NULL;
+ 
+ int hl_init(void) {
+ 	return 0;
+ }
+ 
+ int hl_deinit(void) {
+ 	for (size_t i = 0; i < token_table.element_count; i++) {
+ 		free_token(*(token_t**)vector_get(&token_table, i));
+ 	}
+ 
+ 	return 0;
+ }
diff --cc include/regex.h
index 0000000,2628255..a93d5ae
mode 000000,100644..100644
--- a/include/regex.h
+++ b/include/regex.h
@@@ -1,0 -1,24 +1,24 @@@
+ #ifndef REGEX_H
+ #define REGEX_H
+ 
+ #include <stdbool.h>
+ 
+ #include "vector.h"
+ 
+ extern bool is_case_on;
+ 
+ typedef struct {
+ 	int accepting_state;
+ 	char * str;
+ 	vector_t delta_table;	// <delta_t>
+ 	vector_t catch_table;	// <offshoot_t>
+ } regex_t;
+ 
+ extern regex_t * regex_compile(const char * const pattern);
+ extern int       regex_free(regex_t * const regex);
+ extern bool      regex_search(regex_t * regex, const char * const string);
 -extern int       regex_match(regex_t * regex, const char * const string);
++extern int       regex_match(regex_t * regex, const char * const string, const bool start_of_string, const int string_offset);
+ 
+ extern bool is_magic(const char c);
+ 
+ #endif