From f24fac2ddfe34a851d494cf1ee3c083b01bbac11 Mon Sep 17 00:00:00 2001
From: anon <anon@anon.anon>
Date: Wed, 2 Oct 2024 19:52:22 +0200
Subject: [PATCH] init

---
 .gitignore          |   3 ++
 converter.l         | 119 ++++++++++++++++++++++++++++++++++++++++++++
 data.py             |  43 ++++++++++++++++
 in/assignments.list |  50 +++++++++++++++++++
 in/xop.c            |  60 ++++++++++++++++++++++
 main.py             |  98 ++++++++++++++++++++++++++++++++++++
 requirements.txt    |   1 +
 7 files changed, 374 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 converter.l
 create mode 100644 data.py
 create mode 100644 in/assignments.list
 create mode 100644 in/xop.c
 create mode 100644 main.py
 create mode 100644 requirements.txt
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1f0f7a5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+venv/
+*.yy.*
+*.out
diff --git a/converter.l b/converter.l
new file mode 100644
index 0000000..cbbcd48
--- /dev/null
+++ b/converter.l
@@ -0,0 +1,119 @@
+/* @BAKE
+    flex -o $*.yy.c $@
+    gcc -o $*.out $*.yy.c
+   @STOP
+ */
+%{
+    /* NOTE: this shall be compiled as a shared library so python may call in
+    */
+    /* XXX: we have a problem on nuking system includes;
+             this fucks with trying to be language agnostic;
+             i wonder if hopefully the AI can just realize theres never spaces there
+    */
+    #include <stdio.h>
+
+    int mystate;
+
+    int accumulator = 0;
+
+    #define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
+
+    #define EOL '\n'
+%}
+
+comment_marker  (\/\*)|(\*\/)
+identifier      \$?[A-Za-z0-9_]+
+modify          [+-]{2}
+assignment      ([+-/*%]|(<<)|(>>))=
+shift           (<<)|(>>)
+
+word    {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
+
+%x NORMALIZE ACCUMULATE
+%x IN_STRING
+%option noyywrap nodefault
+%%
+    BEGIN mystate;
+    if (mystate == ACCUMULATE) {
+        ECHOS("[");
+    }
+
+<NORMALIZE>{
+[ ]|\t      { ; }
+\"          {
+                ECHO;
+                BEGIN IN_STRING;
+            }
+{word}|.    {
+                ECHO;
+                ECHOS(" ");
+            }
+\n          {
+                ECHO;
+                return EOL;
+            }
+}
+
+<ACCUMULATE>{
+[ ]         {
+                ++accumulator;
+            }
+\t          {
+                accumulator += 4;
+            }
+\"          {
+                BEGIN IN_STRING;
+            }
+{word}|.    {
+                printf("%d, ", accumulator);
+                accumulator = 0;
+            }
+\n\n        {
+                ECHOS("]\n[0]\n[");
+            }
+\n          {
+                ECHOS("]\n[");
+            }
+}
+
+<IN_STRING>{
+\\\"    {
+            if (mystate == NORMALIZE) {
+                ECHO;
+            }
+        }
+\"      {
+            if (mystate == NORMALIZE) {
+                ECHO;
+            }
+            BEGIN mystate;
+        }
+.|\n    {
+            if (mystate == NORMALIZE) {
+                ECHO;
+            }
+        }
+}
+%%
+
+signed main(const int argc, const char * const * const argv) {
+    if (argc < 3) {
+        puts("Usage: converter <mode> <file>");
+        return 1;
+    }
+
+    if (!strcmp(argv[1], "normalize")) {
+        mystate = NORMALIZE;
+    } else
+    if (!strcmp(argv[1], "accumulate")) {
+        mystate = ACCUMULATE;
+    } else {
+        return 1;
+    }
+
+    yyin = fopen(argv[2], "r");
+
+    while(yylex() == EOL) { ; }
+
+    return 0;
+}
diff --git a/data.py b/data.py
new file mode 100644
index 0000000..d4db29c
--- /dev/null
+++ b/data.py
@@ -0,0 +1,43 @@
+import re
+from bidict import bidict
+
+#CHAR_TOKENS = bidict({
+#	'':   0,
+#	'\n': 1,
+#})
+#CHAR_TOKEN_OFFSET = 1
+
+def encode(s : str) -> str:
+    return re.sub(r'\s+', ' ', s)
+
+#def decode(s : str,  o : [int]) -> str:
+#    result = []
+#    space_index = 0
+#    for char in s:
+#        if char == ' ':
+#            if o[space_index] in CHAR_TOKENS.inverse:
+#                result.append(CHAR_TOKENS.inverse[o[space_index]])
+#            else:
+#                result.append(' ' * (o[space_index] - CHAR_TOKEN_OFFSET))
+#            space_index += 1
+#        else:
+#            result.append(char)
+#    return ''.join(result)
+
+def decode(s : str,  o : [int]) -> str:
+    result = []
+    space_index = 0
+    for char in s:
+        if char == ' ':
+                result.append(' ' * (o[space_index])
+            space_index += 1
+        else:
+            result.append(char)
+    return ''.join(result)
+
+def batchificate(f):
+	BATCH_SIZE = 32
+	s = open(f, 'r').read()
+	s = encode(s)
+
+print(decode(encode('if ( a  == b ) {   a = c   )'), [2,0,2,2,0,1,0,4,1,1]))
diff --git a/in/assignments.list b/in/assignments.list
new file mode 100644
index 0000000..0af0d3c
--- /dev/null
+++ b/in/assignments.list
@@ -0,0 +1,50 @@
+x = 10
+y = 3.14
+str_var = "Hello, World!"
+is_true = True
+list_var = [1, 2, 3, 4, 5]
+dict_var = {"key": "value", "another_key": "another_value"}
+tuple_var = (1, 2, 3)
+set_var = {1, 2, 3}
+let x = 10;
+let y = 3.14;
+let strVar = "Hello, World!";
+let isTrue = true;
+let arrayVar = [1, 2, 3, 4, 5];
+let objectVar = {"key": "value", "anotherKey": "anotherValue"};
+let tupleVar = [1, 2, 3];
+let setVar = new Set([1, 2, 3]);
+int x = 10;
+double y = 3.14;
+String strVar = "Hello, World!";
+boolean isTrue = true;
+int[] arrayVar = {1, 2, 3, 4, 5};
+HashMap<String, String> mapVar = new HashMap<>();
+Tuple tuVar = new Tuple(1, 2);
+Set<Integer> setVar = new HashSet<>();
+int x = 10;
+double y = 3.14;
+string strVar = "Hello, World!";
+bool isTrue = true;
+int[] arrayVar = {1, 2, 3, 4, 5};
+Dictionary<string, string> dictVar = new Dictionary<string, string>();
+dictVar.Add("key", "value");
+dictVar.Add("anotherKey", "anotherValue");
+Tuple<int, int> tupleVar = new Tuple<int, int>(1, 2);
+HashSet<int> setVar = new HashSet<int>();
+x = 10
+y = 3.14
+str_var = "Hello, World!"
+is_true = true
+array_var = [1, 2, 3, 4, 5]
+hash_var = {"key" => "value", "another_key" => "another_value"}
+tuple_var = [1, 2, 3]
+set_var = Set.new([1, 2, 3])
+var x: Int = 10
+var y: Double = 3.14
+var strVar: String = "Hello, World!"
+var isTrue: Bool = true
+var arrayVar: [Int] = [1, 2, 3, 4, 5]
+var dictVar: [String: String] = ["key": "value", "anotherKey": "anotherValue"]
+var tupleVar: (Int, Int) = (1, 2)
+var setVar: Set<Int> = [1, 2, 3]
diff --git a/in/xop.c b/in/xop.c
new file mode 100644
index 0000000..9a77c9e
--- /dev/null
+++ b/in/xop.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023 : Ognjen 'xolatile' Milan Robovic
+ *
+ * Xop is free software!
+ * You will redistribute it or modify it under the terms of
+ * the GNU General Public License by Free Software Foundation.
+ * And when you do redistribute it or modify it,
+ * it will use either version 3 of the License,
+ * or (at yours truly opinion) any later version.
+ * It is distributed in the hope that it will be useful or harmful,
+ * it really depends...
+ * But no warranty what so ever, seriously.
+ * See GNU/GPLv3.
+ */
+
+#include <xolatile/xtandard.h>
+#include <xolatile/xtandard.c>
+
+int main (int argc, char * * argv) {
+	int file   = -1;
+	int size   = 0;
+	int offset = 0;
+
+	unsigned char * buffer = NULL;
+
+	if (argc != 2) {
+		fatal_failure (1, "xop: xop input");
+	}
+
+	file = file_open (argv [1], O_RDONLY);
+	size = file_size (file);
+
+	buffer = allocate (size);
+
+	file_read (file, buffer, size);
+
+	file = file_close (file);
+
+	do {
+		int byte = (int) buffer [offset];
+		if (byte == 0X90) {
+			echo_new_line  ();
+			terminal_style (EFFECT_NORMAL, COLOUR_YELLOW);
+			echo_byte      ((int) buffer [offset]);
+			terminal_style (-1, -1);
+		} else {
+			echo_byte (buffer [offset]);
+		}
+
+		++offset;
+	} while (offset != size);
+
+	echo_new_line ();
+
+	buffer = deallocate (buffer);
+
+	return (EXIT_SUCCESS);
+}
+
+
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..f6b471e
--- /dev/null
+++ b/main.py
@@ -0,0 +1,98 @@
+import subprocess
+import os
+import numpy as np
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+import tensorflow
+from tensorflow import keras
+from keras import layers
+
+LINE_WIDTH = 80
+MAX_SHIMS  = LINE_WIDTH - 1
+
+
+def get_data():
+	r = []
+	def get_source(path : str) -> [str]:
+		'''returns source file 3 line batches'''
+		r = []
+		with open(path, 'r') as file:
+			lines = []
+			for line in file:
+				lines.append(line.strip())
+			r = [lines[i:i + 3] for i in range(0, len(lines), 3)]
+		return r
+	def source_to_np_array(source_batches : []) -> np.array:
+		r = []
+		for s in source_batches:
+			ascii_list = []
+			for l in s:
+				l = l[:LINE_WIDTH]
+				l = l.ljust(LINE_WIDTH)
+				l = [ord(i) for i in l]
+				ascii_list += l
+			n = np.reshape(ascii_list, (3, -1, 1))
+			n = np.expand_dims(n, axis=0)
+			r.append(n)
+		return r
+	def get_whitespace(path : str) -> [int]:
+		'''XXX returns the whitespace list of every middle line'''
+		r = []
+		output_file = "muf_file.txt"
+		process = subprocess.Popen(
+					"converter.out accumulate " + path + " > " + output_file,
+					shell=True,
+		)
+		with open(output_file, 'r') as file:
+			for n, line in enumerate(file):
+				if ((n + 2) % 3) != 0: continue
+				r.append(eval(line))
+		return r
+	source = source_to_np_array(get_source("in/xop.c"))
+	whitespace = get_whitespace("in/xop.c")
+	whitespace = [np.array(i) for i in whitespace]
+	r = {'in': source, 'out': whitespace}
+	return r
+
+data = get_data()
+assert len(data['in']) == len(data['in']), "data in and out sizes were inconsistent."
+print(data['in'], data['out'])
+
+model = keras.Sequential([
+	layers.Conv2D(
+		filters=16,
+		kernel_size=(3,3),
+		strides=(1,1),
+		activation='relu',
+		padding='valid',
+		input_shape=(3,LINE_WIDTH,1)
+	),
+	#layers.Conv2D(
+	#	filters=32,
+	#	kernel_size=(3,7),
+	#	activation='relu',
+	#	padding='valid'
+	#),
+	#layers.Conv2D(
+	#	filters=64,
+	#	kernel_size=(3,13),
+	#	activation='relu',
+	#	padding='valid'
+	#),
+	layers.Flatten(),
+	layers.Dense(64, activation='relu'),
+	layers.Dense(MAX_SHIMS, activation='softmax')
+])
+
+model.compile(
+	optimizer='adam',
+	loss='mse',
+	metrics=['accuracy']
+)
+
+model.fit(data['in'], data['out'],
+    verbose=2,
+    batch_size=10,
+    epochs=50,
+    shuffle=True,
+)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0f57144
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+tensorflow