/// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License
/// for more details, if you dare, it is a lot of text that nobody wants to read...
+/// Xyntax, the most minimal text parser that can deal with syntax highlighting that I could've come up with, and the most generic name for it
+/// also, it's rather slow, but if you're creating heavy duty program, you'd use heavy duty library for it. This library has only one header, so
+/// it's easy to incorporate it into existing projects. If you want to see how it is used, check out simple examples below, if you want more
+/// robust example, check out my other programs, Xarbon and Xighlight.
+///
+/// For start, you want to include this header file, there's no macro for including implementation (like stb libraries), this is for projects
+/// that have only one C source file, and one or more C header files. After that simple make global or local variable 'syntax_structure * x'
+/// defined below, initialize it, define the rules, then in main loop select rule, do your thing with it, then deinitialize the structure. It's
+/// simple, I'll provide minimal examples below.
+
+/// Structure for single syntax definition (array of rules), if you want to parse multiple languages simultaneously, use an array.
+///
+/// syntax_structure * c_syntax = null;
+
typedef struct {
- u4 count;
- u4 limit;
- b4 * enrange;
- b4 * derange;
- c1 * * begin;
- c1 * * end;
- c1 * escape;
- u4 * colour;
- u4 * effect;
+ natural count; /// Count of syntax rules used, maximum is set with limit below, if limit is 0, it'll allocate it dynamically.
+ natural limit; /// Preallocation limit for syntax rules, hardcode it if you don't want this to allocate memory dynamically.
+ boolean * enrange; /// Enrange rule, set to true if you want to begin matching by any character from 'begin' string below.
+ boolean * derange; /// Derange rule, set to true if you want to end matching by any character from 'end' string below.
+ character * * begin; /// String containing set of characters or full string for start of matching, correlating to 'enrange' above.
+ character * * end; /// String containing set of characters or full string for end of matching, correlating to 'derange' above.
+ character * escape; /// Escape character, which will skip one cycle in selection loop, then continue matching for 'end' string.
+ natural * colour; /// Colour for matched array of characters, can be anything, enumerated, literal, hardcoded...
+ natural * effect; /// Effect for matched array of characters, can be anything, enumerated, literal, hardcoded...
} syntax_structure;
-static syntax_structure * syntax_initialize (u4 limit) {
+/// Initialize syntax structure before calling other functions that take it as an argument, set 'limit' to 0 if you want dynamic array of rules.
+///
+/// c_syntax = syntax_initialize (0);
+
+static syntax_structure * syntax_initialize (natural limit) {
syntax_structure * syntax = allocate (sizeof (* syntax));
syntax->limit = limit;
return (syntax);
}
+/// Deinitialize syntax structure after using it, in order to avoid memory leaks.
+///
+/// c_syntax = syntax_deinitialize (c_syntax);
+
static syntax_structure * syntax_deinitialize (syntax_structure * syntax) {
- for (u4 index = 0; index < syntax->count; ++index) {
+ for (natural index = 0; index < syntax->count; ++index) {
syntax->begin [index] = deallocate (syntax->begin [index]);
syntax->end [index] = deallocate (syntax->end [index]);
}
return (deallocate (syntax));
}
-static u4 syntax_define (syntax_structure * syntax, b4 enrange, b4 derange, c1 * begin, c1 * end, c1 escape,
- u4 colour, u4 effect) {
+/// Define single syntax rule, which will be added into array part of syntax structure, return value is index into that array.
+///
+/// Take a look into few simple examples of defining some simplified rules of C programming language.
+/// Two examples below show how to define multiline comments and strings, since these have priority, both enrange and derange are false.
+/// syntax_define (syntax, false, false, "/*", "*/", '\\', 1, 0);
+/// syntax_define (syntax, false, false, "\"", "\"", '\\', 2, 0);
+/// Now we're defining syntax rule for one keyword, static, notice that end string contaings separator characters because derange is true.
+/// syntax_define (syntax, false, true, "static", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 3, 0);
+/// You can define brackets and operator characters separately, or if you want to, you can define some of them separately again.
+/// syntax_define (syntax, true, false, "()[]{}", "", '\0', 4, 0);
+/// syntax_define (syntax, true, false, ".,:;<=>+*-/%!&~^?|", "", '\0', 5, 0);
+/// And lastly, we can define number selection like this below, by setting both enrange and derange as false.
+/// syntax_define (syntax, true, true, "0123456789", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 6, 0);
+
+static natural syntax_define (syntax_structure * syntax, boolean enrange, boolean derange, character * begin, character * end, character escape,
+ natural colour, natural effect) {
++syntax->count;
- u4 current = syntax->count - 1;
+ natural current = syntax->count - 1;
fatal_failure (begin == null, "syntax_define: Begin string is null pointer.");
fatal_failure (end == null, "syntax_define: End string is null pointer.");
return (current);
}
-static u4 syntax_select (syntax_structure * syntax, c1 * string, u4 * length) {
- u4 offset = 0;
- u4 subset = 0;
- u4 select = 0;
-
- u8 begin_length = 0;
- u8 end_length = 0;
+/// After all syntax definitions have been defined, call this function inside your main loop, return value is index of selected rule.
+///
+/// Now, imagine that 'buffer' is file you've loaded into memory, you have declared natural numbers 'offset', 'length' and 'select', and you've
+/// properly initialized syntax structure 'syntax', defined its rules for wanted language(s), simple main loop would look like this:
+/// for (offset = 0; buffer [offset] != '\0'; offset += length) {
+/// /// Notice that we're not incrementing 'offset', we're increasing it by 'length'.
+/// select = syntax_select (syntax, & buffer [offset], & length);
+/// if (select >= syntax->count) {
+/// /// Syntax definition is incomplete, unknown sequence has been detected, either print nothing, or print default.
+/// } else {
+/// /// Print string of 'length', at '& buffer [offset]', using 'syntax->colour [select]' and 'syntax->effect [select]'.
+/// }
+/// }
+
+static natural syntax_select (syntax_structure * syntax, character * string, natural * length) {
+ natural offset = 0;
+ natural subset = 0;
+ natural select = 0;
+
+ natural_64 begin_length = 0;
+ natural_64 end_length = 0;
for (; select != syntax->count; ++select) {
begin_length = string_length (syntax->begin [select]);