This commit is contained in:
anon
2024-10-02 19:52:22 +02:00
commit f24fac2ddf
7 changed files with 374 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
venv/
*.yy.*
*.out

119
converter.l Normal file
View File

@ -0,0 +1,119 @@
/* @BAKE
flex -o $*.yy.c $@
gcc -o $*.out $*.yy.c
@STOP
*/
%{
/* NOTE: this shall be compiled as a shared library so python may call in
*/
/* XXX: we have a problem on nuking system includes;
this fucks with trying to be language agnostic;
i wonder if hopefully the AI can just realize theres never spaces there
*/
#include <stdio.h>
int mystate;
int accumulator = 0;
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
#define EOL '\n'
%}
comment_marker (\/\*)|(\*\/)
identifier \$?[A-Za-z0-9_]+
modify [+-]{2}
assignment ([+-/*%]|(<<)|(>>))=
shift (<<)|(>>)
word {identifier}|{comment_marker}|{assignment}|{shift}|{modify}
%x NORMALIZE ACCUMULATE
%x IN_STRING
%option noyywrap nodefault
%%
BEGIN mystate;
if (mystate == ACCUMULATE) {
ECHOS("[");
}
<NORMALIZE>{
[ ]|\t { ; }
\" {
ECHO;
BEGIN IN_STRING;
}
{word}|. {
ECHO;
ECHOS(" ");
}
\n {
ECHO;
return EOL;
}
}
<ACCUMULATE>{
[ ] {
++accumulator;
}
\t {
accumulator += 4;
}
\" {
BEGIN IN_STRING;
}
{word}|. {
printf("%d, ", accumulator);
accumulator = 0;
}
\n\n {
ECHOS("]\n[0]\n[");
}
\n {
ECHOS("]\n[");
}
}
<IN_STRING>{
\\\" {
if (mystate == NORMALIZE) {
ECHO;
}
}
\" {
if (mystate == NORMALIZE) {
ECHO;
}
BEGIN mystate;
}
.|\n {
if (mystate == NORMALIZE) {
ECHO;
}
}
}
%%
signed main(const int argc, const char * const * const argv) {
if (argc < 3) {
puts("Usage: converter <mode> <file>");
return 1;
}
if (!strcmp(argv[1], "normalize")) {
mystate = NORMALIZE;
} else
if (!strcmp(argv[1], "accumulate")) {
mystate = ACCUMULATE;
} else {
return 1;
}
yyin = fopen(argv[2], "r");
while(yylex() == EOL) { ; }
return 0;
}

43
data.py Normal file
View File

@ -0,0 +1,43 @@
import re
from bidict import bidict
#CHAR_TOKENS = bidict({
# '': 0,
# '\n': 1,
#})
#CHAR_TOKEN_OFFSET = 1
def encode(s : str) -> str:
return re.sub(r'\s+', ' ', s)
#def decode(s : str, o : [int]) -> str:
# result = []
# space_index = 0
# for char in s:
# if char == ' ':
# if o[space_index] in CHAR_TOKENS.inverse:
# result.append(CHAR_TOKENS.inverse[o[space_index]])
# else:
# result.append(' ' * (o[space_index] - CHAR_TOKEN_OFFSET))
# space_index += 1
# else:
# result.append(char)
# return ''.join(result)
def decode(s : str, o : [int]) -> str:
result = []
space_index = 0
for char in s:
if char == ' ':
result.append(' ' * (o[space_index])
space_index += 1
else:
result.append(char)
return ''.join(result)
def batchificate(f):
BATCH_SIZE = 32
s = open(f, 'r').read()
s = encode(s)
print(decode(encode('if ( a == b ) { a = c )'), [2,0,2,2,0,1,0,4,1,1]))

50
in/assignments.list Normal file
View File

@ -0,0 +1,50 @@
x = 10
y = 3.14
str_var = "Hello, World!"
is_true = True
list_var = [1, 2, 3, 4, 5]
dict_var = {"key": "value", "another_key": "another_value"}
tuple_var = (1, 2, 3)
set_var = {1, 2, 3}
let x = 10;
let y = 3.14;
let strVar = "Hello, World!";
let isTrue = true;
let arrayVar = [1, 2, 3, 4, 5];
let objectVar = {"key": "value", "anotherKey": "anotherValue"};
let tupleVar = [1, 2, 3];
let setVar = new Set([1, 2, 3]);
int x = 10;
double y = 3.14;
String strVar = "Hello, World!";
boolean isTrue = true;
int[] arrayVar = {1, 2, 3, 4, 5};
HashMap<String, String> mapVar = new HashMap<>();
Tuple tuVar = new Tuple(1, 2);
Set<Integer> setVar = new HashSet<>();
int x = 10;
double y = 3.14;
string strVar = "Hello, World!";
bool isTrue = true;
int[] arrayVar = {1, 2, 3, 4, 5};
Dictionary<string, string> dictVar = new Dictionary<string, string>();
dictVar.Add("key", "value");
dictVar.Add("anotherKey", "anotherValue");
Tuple<int, int> tupleVar = new Tuple<int, int>(1, 2);
HashSet<int> setVar = new HashSet<int>();
x = 10
y = 3.14
str_var = "Hello, World!"
is_true = true
array_var = [1, 2, 3, 4, 5]
hash_var = {"key" => "value", "another_key" => "another_value"}
tuple_var = [1, 2, 3]
set_var = Set.new([1, 2, 3])
var x: Int = 10
var y: Double = 3.14
var strVar: String = "Hello, World!"
var isTrue: Bool = true
var arrayVar: [Int] = [1, 2, 3, 4, 5]
var dictVar: [String: String] = ["key": "value", "anotherKey": "anotherValue"]
var tupleVar: (Int, Int) = (1, 2)
var setVar: Set<Int> = [1, 2, 3]

60
in/xop.c Normal file
View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2023 : Ognjen 'xolatile' Milan Robovic
*
* Xop is free software!
* You will redistribute it or modify it under the terms of
* the GNU General Public License by Free Software Foundation.
* And when you do redistribute it or modify it,
* it will use either version 3 of the License,
* or (at yours truly opinion) any later version.
* It is distributed in the hope that it will be useful or harmful,
* it really depends...
* But no warranty what so ever, seriously.
* See GNU/GPLv3.
*/
#include <xolatile/xtandard.h>
#include <xolatile/xtandard.c>
int main (int argc, char * * argv) {
int file = -1;
int size = 0;
int offset = 0;
unsigned char * buffer = NULL;
if (argc != 2) {
fatal_failure (1, "xop: xop input");
}
file = file_open (argv [1], O_RDONLY);
size = file_size (file);
buffer = allocate (size);
file_read (file, buffer, size);
file = file_close (file);
do {
int byte = (int) buffer [offset];
if (byte == 0X90) {
echo_new_line ();
terminal_style (EFFECT_NORMAL, COLOUR_YELLOW);
echo_byte ((int) buffer [offset]);
terminal_style (-1, -1);
} else {
echo_byte (buffer [offset]);
}
++offset;
} while (offset != size);
echo_new_line ();
buffer = deallocate (buffer);
return (EXIT_SUCCESS);
}

98
main.py Normal file
View File

@ -0,0 +1,98 @@
import subprocess
import os
import numpy as np
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow
from tensorflow import keras
from keras import layers
LINE_WIDTH = 80
MAX_SHIMS = LINE_WIDTH - 1
def get_data():
r = []
def get_source(path : str) -> [str]:
'''returns source file 3 line batches'''
r = []
with open(path, 'r') as file:
lines = []
for line in file:
lines.append(line.strip())
r = [lines[i:i + 3] for i in range(0, len(lines), 3)]
return r
def source_to_np_array(source_batches : []) -> np.array:
r = []
for s in source_batches:
ascii_list = []
for l in s:
l = l[:LINE_WIDTH]
l = l.ljust(LINE_WIDTH)
l = [ord(i) for i in l]
ascii_list += l
n = np.reshape(ascii_list, (3, -1, 1))
n = np.expand_dims(n, axis=0)
r.append(n)
return r
def get_whitespace(path : str) -> [int]:
'''XXX returns the whitespace list of every middle line'''
r = []
output_file = "muf_file.txt"
process = subprocess.Popen(
"converter.out accumulate " + path + " > " + output_file,
shell=True,
)
with open(output_file, 'r') as file:
for n, line in enumerate(file):
if ((n + 2) % 3) != 0: continue
r.append(eval(line))
return r
source = source_to_np_array(get_source("in/xop.c"))
whitespace = get_whitespace("in/xop.c")
whitespace = [np.array(i) for i in whitespace]
r = {'in': source, 'out': whitespace}
return r
data = get_data()
assert len(data['in']) == len(data['in']), "data in and out sizes were inconsistent."
print(data['in'], data['out'])
model = keras.Sequential([
layers.Conv2D(
filters=16,
kernel_size=(3,3),
strides=(1,1),
activation='relu',
padding='valid',
input_shape=(3,LINE_WIDTH,1)
),
#layers.Conv2D(
# filters=32,
# kernel_size=(3,7),
# activation='relu',
# padding='valid'
#),
#layers.Conv2D(
# filters=64,
# kernel_size=(3,13),
# activation='relu',
# padding='valid'
#),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(MAX_SHIMS, activation='softmax')
])
model.compile(
optimizer='adam',
loss='mse',
metrics=['accuracy']
)
model.fit(data['in'], data['out'],
verbose=2,
batch_size=10,
epochs=50,
shuffle=True,
)

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
tensorflow