From ca3b5279b7c83f26409ab301f3207349b654f026 Mon Sep 17 00:00:00 2001 From: xolatile Date: Mon, 2 Jun 2025 22:55:13 +0200 Subject: [PATCH] Testing bad documentation... --- xyntax.h | 92 +++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 21 deletions(-) diff --git a/xyntax.h b/xyntax.h index 36dbe06..4495105 100755 --- a/xyntax.h +++ b/xyntax.h @@ -16,19 +16,37 @@ /// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License /// for more details, if you dare, it is a lot of text that nobody wants to read... +/// Xyntax, the most minimal text parser that can deal with syntax highlighting that I could've come up with, and the most generic name for it +/// also, it's rather slow, but if you're creating heavy duty program, you'd use heavy duty library for it. This library has only one header, so +/// it's easy to incorporate it into existing projects. If you want to see how it is used, check out simple examples below, if you want more +/// robust example, check out my other programs, Xarbon and Xighlight. +/// +/// For start, you want to include this header file, there's no macro for including implementation (like stb libraries), this is for projects +/// that have only one C source file, and one or more C header files. After that simple make global or local variable 'syntax_structure * x' +/// defined below, initialize it, define the rules, then in main loop select rule, do your thing with it, then deinitialize the structure. It's +/// simple, I'll provide minimal examples below. + +/// Structure for single syntax definition (array of rules), if you want to parse multiple languages simultaneously, use an array. +/// +/// syntax_structure * c_syntax = null; + typedef struct { - u4 count; - u4 limit; - b4 * enrange; - b4 * derange; - c1 * * begin; - c1 * * end; - c1 * escape; - u4 * colour; - u4 * effect; + natural count; /// Count of syntax rules used, maximum is set with limit below, if limit is 0, it'll allocate it dynamically. + natural limit; /// Preallocation limit for syntax rules, hardcode it if you don't want this to allocate memory dynamically. + boolean * enrange; /// Enrange rule, set to true if you want to begin matching by any character from 'begin' string below. + boolean * derange; /// Derange rule, set to true if you want to end matching by any character from 'end' string below. + character * * begin; /// String containing set of characters or full string for start of matching, correlating to 'enrange' above. + character * * end; /// String containing set of characters or full string for end of matching, correlating to 'derange' above. + character * escape; /// Escape character, which will skip one cycle in selection loop, then continue matching for 'end' string. + natural * colour; /// Colour for matched array of characters, can be anything, enumerated, literal, hardcoded... + natural * effect; /// Effect for matched array of characters, can be anything, enumerated, literal, hardcoded... } syntax_structure; -static syntax_structure * syntax_initialize (u4 limit) { +/// Initialize syntax structure before calling other functions that take it as an argument, set 'limit' to 0 if you want dynamic array of rules. +/// +/// c_syntax = syntax_initialize (0); + +static syntax_structure * syntax_initialize (natural limit) { syntax_structure * syntax = allocate (sizeof (* syntax)); syntax->limit = limit; @@ -46,8 +64,12 @@ static syntax_structure * syntax_initialize (u4 limit) { return (syntax); } +/// Deinitialize syntax structure after using it, in order to avoid memory leaks. +/// +/// c_syntax = syntax_deinitialize (c_syntax); + static syntax_structure * syntax_deinitialize (syntax_structure * syntax) { - for (u4 index = 0; index < syntax->count; ++index) { + for (natural index = 0; index < syntax->count; ++index) { syntax->begin [index] = deallocate (syntax->begin [index]); syntax->end [index] = deallocate (syntax->end [index]); } @@ -63,11 +85,25 @@ static syntax_structure * syntax_deinitialize (syntax_structure * syntax) { return (deallocate (syntax)); } -static u4 syntax_define (syntax_structure * syntax, b4 enrange, b4 derange, c1 * begin, c1 * end, c1 escape, - u4 colour, u4 effect) { +/// Define single syntax rule, which will be added into array part of syntax structure, return value is index into that array. +/// +/// Take a look into few simple examples of defining some simplified rules of C programming language. +/// Two examples below show how to define multiline comments and strings, since these have priority, both enrange and derange are false. +/// syntax_define (syntax, false, false, "/*", "*/", '\\', 1, 0); +/// syntax_define (syntax, false, false, "\"", "\"", '\\', 2, 0); +/// Now we're defining syntax rule for one keyword, static, notice that end string contaings separator characters because derange is true. +/// syntax_define (syntax, false, true, "static", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 3, 0); +/// You can define brackets and operator characters separately, or if you want to, you can define some of them separately again. +/// syntax_define (syntax, true, false, "()[]{}", "", '\0', 4, 0); +/// syntax_define (syntax, true, false, ".,:;<=>+*-/%!&~^?|", "", '\0', 5, 0); +/// And lastly, we can define number selection like this below, by setting both enrange and derange as false. +/// syntax_define (syntax, true, true, "0123456789", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 6, 0); + +static natural syntax_define (syntax_structure * syntax, boolean enrange, boolean derange, character * begin, character * end, character escape, + natural colour, natural effect) { ++syntax->count; - u4 current = syntax->count - 1; + natural current = syntax->count - 1; fatal_failure (begin == null, "syntax_define: Begin string is null pointer."); fatal_failure (end == null, "syntax_define: End string is null pointer."); @@ -99,13 +135,27 @@ static u4 syntax_define (syntax_structure * syntax, b4 enrange, b4 derange, c1 * return (current); } -static u4 syntax_select (syntax_structure * syntax, c1 * string, u4 * length) { - u4 offset = 0; - u4 subset = 0; - u4 select = 0; - - u8 begin_length = 0; - u8 end_length = 0; +/// After all syntax definitions have been defined, call this function inside your main loop, return value is index of selected rule. +/// +/// Now, imagine that 'buffer' is file you've loaded into memory, you have declared natural numbers 'offset', 'length' and 'select', and you've +/// properly initialized syntax structure 'syntax', defined its rules for wanted language(s), simple main loop would look like this: +/// for (offset = 0; buffer [offset] != '\0'; offset += length) { +/// /// Notice that we're not incrementing 'offset', we're increasing it by 'length'. +/// select = syntax_select (syntax, & buffer [offset], & length); +/// if (select >= syntax->count) { +/// /// Syntax definition is incomplete, unknown sequence has been detected, either print nothing, or print default. +/// } else { +/// /// Print string of 'length', at '& buffer [offset]', using 'syntax->colour [select]' and 'syntax->effect [select]'. +/// } +/// } + +static natural syntax_select (syntax_structure * syntax, character * string, natural * length) { + natural offset = 0; + natural subset = 0; + natural select = 0; + + natural_64 begin_length = 0; + natural_64 end_length = 0; for (; select != syntax->count; ++select) { begin_length = string_length (syntax->begin [select]); -- 2.39.5