]> git.xolatile.top Git - public-libhl.git/commitdiff
regex2 extensions
authoranon <anon@anon.anon>
Wed, 23 Aug 2023 23:08:40 +0000 (01:08 +0200)
committeranon <anon@anon.anon>
Wed, 23 Aug 2023 23:08:40 +0000 (01:08 +0200)
source/regex2.hpp

index 25badf39cf8cd60b593c73a628892ccd9ac8e934..708c41f0664fe0e1058768c8090d9b30d4453ead 100644 (file)
@@ -1,4 +1,3 @@
-#include <vector>
 #include <assert.h>
 #include <string.h>
 
@@ -52,10 +51,37 @@ bool is_quantifier(const char c){
 int escape_1_to_1(const char c, char * whitelist) {
        switch(c) {
                case 't': {
-                       strcpy(whitelist, "\t");
+                       strcat(whitelist, "\t");
                } return 1;
                case 'n': {
-                       strcpy(whitelist, "\n");
+                       strcat(whitelist, "\n");
+               } return 1;
+               case 'r': {
+                       strcat(whitelist, "\r");
+               } return 1;
+               case 'b': {
+                       strcat(whitelist, "\b");
+               } return 1;
+               case '[': {
+                       strcat(whitelist, "[");
+               } return 1;
+               case ']': {
+                       strcat(whitelist, "]");
+               } return 1;
+               case '.': {
+                       strcat(whitelist, ".");
+               } return 1;
+               case '?': {
+                       strcat(whitelist, "?");
+               } return 1;
+               case '+': {
+                       strcat(whitelist, "+");
+               } return 1;
+               case '*': {
+                       strcat(whitelist, "*");
+               } return 1;
+               case '\\': {
+                       strcat(whitelist, "\\");
                } return 1;
        }
 
@@ -64,20 +90,90 @@ int escape_1_to_1(const char c, char * whitelist) {
 
 int escape_1_to_N(const char c, char * whitelist) {
        switch(c) {
+               case 'i': {
+                       const char identifier_chars[] = "@0123456789_\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, identifier_chars);
+                       return sizeof(identifier_chars)-1;
+               };
+               case 'I': {
+                       const char identifier_chars[] = "@_\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, identifier_chars);
+                       return sizeof(identifier_chars)-1;
+               };
+               case 'k': {
+                       const char keyword_chars[] = "@0123456789_\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, keyword_chars);
+                       return sizeof(keyword_chars)-1;
+               };
+               case 'K': {
+                       const char keyword_chars[] = "@_\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, keyword_chars);
+                       return sizeof(keyword_chars)-1;
+               };
+               case 'f': {
+                       const char filename_chars[] = "@0123456789/.-_+,#$%~=";
+                       strcpy(whitelist, keyword_chars);
+                       return sizeof(keyword_chars)-1;
+               };
+               case 'F': {
+                       const char filename_chars[] = "@/.-_+,#$%~=";
+                       strcpy(whitelist, keyword_chars);
+                       return sizeof(keyword_chars)-1;
+               };
+               case 'p': {
+                       const char printable_chars[] = "@\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, printable_chars);
+                       return sizeof(printable_chars)-1;
+               };
+               case 'P': {
+                       const char printable_chars[] = "@\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337";
+                       strcpy(whitelist, printable_chars);
+                       return sizeof(printable_chars)-1;
+               };
+               case 's': {
+                       const char whitespace_chars[] = " \t\v\n";
+                       strcpy(whitelist, whitespace_chars);
+                       return sizeof(whitespace_chars)-1;
+               };
                case 'd': {
-                       const char digitchars[] = "0123456789";
-                       strcpy(whitelist, digitchars);
-                       return sizeof(digitchars)-1;
+                       const char digit_chars[] = "0123456789";
+                       strcpy(whitelist, digit_chars);
+                       return sizeof(digit_chars)-1;
+               };
+               case 'x': {
+                       const char hex_chars[] = "0123456789abcdefABCDEF";
+                       strcpy(whitelist, hex_chars);
+                       return sizeof(hex_chars)-1;
+               };
+               case 'o': {
+                       const char oct_chars[] = "01234567";
+                       strcpy(whitelist, oct_chars);
+                       return sizeof(oct_chars)-1;
                };
                case 'w': {
-                       const char wordchars[] = "abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUWXYZ";
-                       strcpy(whitelist, wordchars);
-                       return sizeof(wordchars)-1;
+                       const char word_chars[] = "0123456789abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUWXYZ_";
+                       strcpy(whitelist, word_chars);
+                       return sizeof(word_chars)-1;
                };
-               case 's': {
-                       const char blankchars[] = " \t\v\n";
-                       strcpy(whitelist, blankchars);
-                       return sizeof(blankchars)-1;
+               case 'h': {
+                       const char very_word_chars[] = "abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUWXYZ_";
+                       strcpy(whitelist, very_word_chars);
+                       return sizeof(very_word_chars)-1;
+               };
+               case 'a': {
+                       const char alpha_chars[] = "abcdefghijklmnopqrstuwxyzABCDEFGHIJKLMNOPQRSTUWXYZ";
+                       strcpy(whitelist, alpha_chars);
+                       return sizeof(alpha_chars)-1;
+               };
+               case 'l': {
+                       const char lower_alpha_chars[] = "abcdefghijklmnopqrstuwxyz";
+                       strcpy(whitelist, lower_alpha_chars);
+                       return sizeof(lower_alpha_chars)-1;
+               };
+               case 'u': {
+                       const char upper_alpha_chars[] = "ABCDEFGHIJKLMNOPQRSTUWXYZ";
+                       strcpy(whitelist, upper_alpha_chars);
+                       return sizeof(upper_alpha_chars)-1;
                };
        }
 
@@ -93,7 +189,10 @@ int compile_range(const char * const     range,
        for (s = range+1; *s != ']'; s++) {
                assert(*s != '\00' && "Unclosed range.");
                char c = *s;
-               if (*(s+1) == '-') {
+               if (escape_1_to_1(c, whitelist)
+               ||  escape_1_to_N(c, whitelist)) {
+                       ;
+               } else if (*(s+1) == '-') {
                        char end = *(s+2);
                        assert(c < end && "Endless range.");
                        for (char cc = c; cc < end+1; cc++) {