closer to the truth

2024-10-03 10:14:24 +02:00
parent 0a2cbb014c
commit 95c847b9a1
4 changed files with 58 additions and 33 deletions
--- a/converter.l
+++ b/converter.l
@ -11,19 +11,22 @@
             i wonder if hopefully the AI can just realize theres never spaces there
    */
    #include <stdio.h>
+    #include <stdbool.h>

-	#define LINE_WIDTH 80
-	#define MAX_SHIMS  LINE_WIDTH - 1
+    #define LINE_WIDTH 80
+    #define MAX_SHIMS  LINE_WIDTH - 1

    int mystate;

+    bool was_word = false;
+
    int accumulator = 0;

-	FILE * build_file;
-	int schemantic[MAX_SHIMS];
-	int schim = 0;
+    FILE * build_file;
+    int schemantic[MAX_SHIMS];
+    int schim = 0;

-	#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, 1, build_file)
+    #define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, 1, build_file)
    #define ECHOS(s) fwrite(s, strlen(s), 1, yyout)

    #define EOL '\n'
@ -35,7 +38,8 @@ modify          [+-]{2}
 assignment      ([+-/*%]|(<<)|(>>))=
 shift           (<<)|(>>)

-word    {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
+word    {identifier}
+special {comment_marker}|{assignment}|{shift}|{modify}

 %x NORMALIZE ACCUMULATE BUILD
 %x IN_STRING
@ -52,9 +56,16 @@ word    {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
                ECHO;
                BEGIN IN_STRING;
            }
-{word}|.    {
+{word}      {
+                if (was_word) {
+                    ECHOS(" ");
+                }
                ECHO;
-                ECHOS(" ");
+                was_word = true;
+            }
+{special}|. {
+                ECHO;
+                was_word = false;
            }
 \n          {
                ECHO;
@ -72,7 +83,16 @@ word    {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
 \"          {
                BEGIN IN_STRING;
            }
-{word}|.    {
+{word}      {
+                if (was_word) {
+                    --accumulator;
+                }
+                was_word = true;
+                printf("%d, ", accumulator);
+                accumulator = 0;
+            }
+{special}|. {
+                was_word = false;
                printf("%d, ", accumulator);
                accumulator = 0;
            }
@ -89,13 +109,13 @@ word    {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
 {word}|.    {
                ECHO;
                for (int i = 0; i < schemantic[schim]; i++) {
-					ECHOS(" ");
-				}
-				++schim;
+                    ECHOS(" ");
+                }
+                ++schim;
+            }
+\n          {
+                STEP_SCHEMANTIC;
            }
-\n			{
-				STEP_SCHEMANTIC;
-			}
 }

 <IN_STRING>{
@ -132,8 +152,8 @@ signed main(const int argc, const char * const * const argv) {
    } else
    if (!strcmp(argv[1], "build")) {
        mystate = BUILD;
-		build_file = fopen("build_file", "rb");
-		STEP_SCHEMANTIC;
+        build_file = fopen("build_file", "rb");
+        STEP_SCHEMANTIC;
    } else {
        return 1;
    }
--- a/data.py
+++ b/data.py
@ -9,11 +9,11 @@ def get_data():
 	def get_source(path : str) -> [str]:
 		'''returns source file 3 line batches'''
 		r = []
-		with open(path, 'r') as file:
-			lines = []
-			for line in file:
-				lines.append(line.strip())
-			r = [lines[i:i + 3] for i in range(0, len(lines), 3)]
+		with open(path, 'r') as file: lines = [line[:-1] for line in file]
+		lines.insert(0, "")
+		lines.append("")
+		for i in range(len(lines)-2):
+			r.append(lines[i:i+3])
 		return r
 	def source_to_np_array(source_batches : []) -> np.array:
 		r = []
@ -34,20 +34,27 @@ def get_data():
 		output = "muf_file.txt"
 		tard_wrangler.accumulate(INPUT_FILE, output)
 		with open(output, 'r') as file:
-			for n, line in enumerate(file):
-				if ((n + 2) % 3) != 0: continue
-				l = eval(line)
-				l = l + [0] * (MAX_SHIMS - len(l))
-				r.append(l)
+			for line in file:
+				try:
+					l = eval(line)
+					l = l + [0] * (MAX_SHIMS - len(l))
+					r.append(l)
+				except: pass
 		return r
 	def whitespace_to_np_array(spaces : []) -> np.array:
 		r = spaces
-		r = np.array(r).reshape(20, -1)
+		r = np.array(r).reshape(len(spaces), -1)
 		return r
 	source = source_to_np_array(get_source(INPUT_FILE))
 	whitespace = whitespace_to_np_array(get_whitespace(INPUT_FILE))
 	r = {'in': source, 'out': whitespace}
-	assert len(r['in']) == len(r['in']), "data in and out sizes were inconsistent."
+	assert len(r['in']) == len(r['out']), (
+			"data in and out sizes were inconsistent ("
+			+ str(r['in'].shape)
+			+ " "
+			+ str(r['out'].shape)
+			+ "."
+	)
 	return r

 if __name__ == "__main__":
--- a/data/xop.c
+++ b/data/xop.c
@ -56,5 +56,3 @@ int main (int argc, char * * argv) {

 	return (EXIT_SUCCESS);
 }
-
-
--- a/formatter.py
+++ b/formatter.py
@ -17,7 +17,7 @@ model = keras.Sequential([
 	keras.Input(shape=(3, LINE_WIDTH, 1)),
 	layers.Conv2D(
 		filters=16,
-		kernel_size=(3,3),
+		kernel_size=(3,5),
 		strides=(1,1),
 		activation='relu',
 		padding='valid',