it does something now

This commit is contained in:
anon
2024-10-07 13:00:15 +02:00
parent 27338a3481
commit 4b6bf0f208
7 changed files with 119 additions and 60 deletions

View File

@ -3,3 +3,4 @@ MAX_SHIMS = LINE_WIDTH - 1
SOURCE_LINE_BATCH_SIZE = 3
COMPILE_INPUT_DIRECTORY = "data/linux/"
MODEL_DIRECTORY = "models/"

View File

@ -18,14 +18,17 @@
FILE * build_file;
char schemantic[MAX_SHIMS];
int schim = 0;
int schim;
#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, sizeof(char), build_file)
#define STEP_SCHEMANTIC do { \
schim = 0; \
int re = fread(schemantic, sizeof(char), MAX_SHIMS, build_file); \
if (re != sizeof(char)*MAX_SHIMS) { printf("- %d\n", re); exit(2); } \
} while (0)
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
#define EOL '\n'
%}
// != is missing
comment_marker (\/\*)|(\*\/)
identifier \$?[A-Za-z0-9_]+
modify [+-]{2}
@ -35,8 +38,16 @@ shift (<<)|(>>)
word {identifier}
special {comment_marker}|{assignment}|{shift}|{modify}
%x NORMALIZE ACCUMULATE BUILD
%x IN_STRING
// Keep all but the required whitespaces
%x NORMALIZE
// Count the non-required whitespaces and write python arrays
%x ACCUMULATE
// Reconstruct normalized file based on binary whitespace count arrays
%x BUILD
%option yylineno
%option noyywrap nodefault
%%
BEGIN mystate;
@ -61,10 +72,7 @@ special {comment_marker}|{assignment}|{shift}|{modify}
ECHO;
was_word = false;
}
\n {
ECHO;
return EOL;
}
\n { ECHO; }
}
<ACCUMULATE>{
@ -99,15 +107,17 @@ special {comment_marker}|{assignment}|{shift}|{modify}
}
<BUILD>{
[ ]|\t { ; }
{word}|. {
ECHO;
[ ] { ECHO; }
{word}|{special}|. {
for (char i = 0; i < schemantic[schim]; i++) {
ECHOS(" ");
}
ECHO;
++schim;
}
\n {
\n { // XXX we find the last newline and still step, resulting in an error
ECHO;
STEP_SCHEMANTIC;
}
}
@ -147,6 +157,7 @@ signed main(const int argc, const char * const * const argv) {
if (!strcmp(argv[1], "build")) {
mystate = BUILD;
build_file = fopen("build_file", "rb");
if (!build_file) { exit(1); }
STEP_SCHEMANTIC;
} else {
return 1;
@ -154,7 +165,7 @@ signed main(const int argc, const char * const * const argv) {
yyin = fopen(argv[2], "r");
while(yylex() == EOL) { ; }
yylex();
return 0;
}

17
data.py
View File

@ -1,11 +1,14 @@
from glob import glob
import numpy as np
import pickle
import sys
from sys import argv
from config import *
import tard_wrangler
MAX_DATA_LIMIT = sys.maxsize
def get_source(path : str) -> [str]:
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
r = []
@ -54,11 +57,17 @@ def whitespace_to_np_array(spaces : []) -> np.array:
def compile_data():
r = {'in': [], 'out': [], 'src': []}
for n, path in enumerate(glob(COMPILE_INPUT_DIRECTORY + "/*.c")):
if n > 47: break # XXX
acc_path = path + ".acc"
if n > MAX_DATA_LIMIT: break # XXX
acc_path = path + ".acc"
norm_path = path + ".norm"
r['src'].append(path)
r['in'] += get_source(path)
r['out'] += read_acc(acc_path)
source_batches = get_source(norm_path)
accumulation = read_acc(acc_path)
assert len(source_batches) == len(accumulation), (
f"Some retard fucked up strings in {path}."
)
r['in'] += source_batches
r['out'] += accumulation
r['in'] = source_to_np_array(r['in'])
r['out'] = whitespace_to_np_array(r['out'])
return r

View File

@ -1,46 +1,20 @@
from datetime import datetime
from sys import argv
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow
from tensorflow import keras
from keras import layers
from config import *
import model
import data
import tard_wrangler
dataset = data.get_data()
if len(argv) > 1:
mymodel = model.load_model(argv[1])
else:
dataset = data.get_data()
mymodel = model.make_model(dataset)
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
# XXX: add more conv layers
model = keras.Sequential([
keras.Input(shape=(3, LINE_WIDTH, 1)),
layers.Conv2D(
filters=16,
kernel_size=(3,5),
strides=(1,1),
activation='relu',
padding='valid',
),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(MAX_SHIMS) #activation='softmax'
])
model.compile(
optimizer='adam',
loss='mse',
metrics=['mae']
)
model.fit(dataset['in'], dataset['out'],
verbose=2,
batch_size=10,
epochs=50,
shuffle=True,
)
prediction = model.predict(dataset['in'])[0]
prediction = prediction.astype(np.uint8).tobytes()
tard_wrangler.build("data/xop.c.norm", prediction)
predictions = tard_wrangler.full_predict("data/xop.c.norm", mymodel)
tard_wrangler.build("data/xop.c.norm", predictions)
tard_wrangler.cat_build()

53
model.py Normal file
View File

@ -0,0 +1,53 @@
import numpy as np
import pickle
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras
from keras import layers
from config import *
@tf.function
def custom_weighted_loss(y_true, y_pred):
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
def make_model(dataset : np.array) -> keras.Model:
# XXX: add more conv layers
model = keras.Sequential([
keras.Input(shape=(3, LINE_WIDTH, 1)),
layers.Conv2D(
filters=16,
kernel_size=(3,5),
strides=(1,1),
activation='relu',
padding='valid',
),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(MAX_SHIMS) #activation='softmax'
])
model.compile(
optimizer='adam',
#loss='mse',
loss=custom_weighted_loss,
metrics=['mae']
)
model.fit(dataset['in'], dataset['out'],
verbose=2,
batch_size=10,
epochs=50,
shuffle=True,
)
return model
def load_model(path : str) -> keras.Model:
return keras.models.load_model(path,
compile=False
)

0
models/.gitkeep Normal file
View File

View File

@ -2,6 +2,7 @@ import subprocess
import numpy as np
from config import *
import data
def accumulate(path : str, output : str) -> None:
process = subprocess.Popen(
@ -9,10 +10,20 @@ def accumulate(path : str, output : str) -> None:
shell=True,
)
def build(path : str, prediction : np.array):
with open("build_file", "wb") as file:
file.write(prediction)
def full_predict(path : str, model) -> []:
r = []
myinput = data.source_to_np_array(data.get_source(path))
for i in myinput:
r += model.predict(np.expand_dims(i, axis=0)).astype(np.uint8).tobytes()
return r
def build(path : str, predictions : []) -> None:
predictions = b''.join([i.to_bytes(1, byteorder='big', signed=False) for i in predictions])
with open("build_file", "wb") as f: f.write(predictions)
process = subprocess.Popen(
"converter.out build " + path + " > out.c",
shell=True,
)
def cat_build():
with open("out.c") as f: print(f.read())