it does something now
This commit is contained in:
@ -3,3 +3,4 @@ MAX_SHIMS = LINE_WIDTH - 1
|
||||
SOURCE_LINE_BATCH_SIZE = 3
|
||||
|
||||
COMPILE_INPUT_DIRECTORY = "data/linux/"
|
||||
MODEL_DIRECTORY = "models/"
|
||||
|
39
converter.l
39
converter.l
@ -18,14 +18,17 @@
|
||||
|
||||
FILE * build_file;
|
||||
char schemantic[MAX_SHIMS];
|
||||
int schim = 0;
|
||||
int schim;
|
||||
|
||||
#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, sizeof(char), build_file)
|
||||
#define STEP_SCHEMANTIC do { \
|
||||
schim = 0; \
|
||||
int re = fread(schemantic, sizeof(char), MAX_SHIMS, build_file); \
|
||||
if (re != sizeof(char)*MAX_SHIMS) { printf("- %d\n", re); exit(2); } \
|
||||
} while (0)
|
||||
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
||||
|
||||
#define EOL '\n'
|
||||
%}
|
||||
|
||||
// != is missing
|
||||
comment_marker (\/\*)|(\*\/)
|
||||
identifier \$?[A-Za-z0-9_]+
|
||||
modify [+-]{2}
|
||||
@ -35,8 +38,16 @@ shift (<<)|(>>)
|
||||
word {identifier}
|
||||
special {comment_marker}|{assignment}|{shift}|{modify}
|
||||
|
||||
%x NORMALIZE ACCUMULATE BUILD
|
||||
%x IN_STRING
|
||||
|
||||
// Keep all but the required whitespaces
|
||||
%x NORMALIZE
|
||||
// Count the non-required whitespaces and write python arrays
|
||||
%x ACCUMULATE
|
||||
// Reconstruct normalized file based on binary whitespace count arrays
|
||||
%x BUILD
|
||||
|
||||
%option yylineno
|
||||
%option noyywrap nodefault
|
||||
%%
|
||||
BEGIN mystate;
|
||||
@ -61,10 +72,7 @@ special {comment_marker}|{assignment}|{shift}|{modify}
|
||||
ECHO;
|
||||
was_word = false;
|
||||
}
|
||||
\n {
|
||||
ECHO;
|
||||
return EOL;
|
||||
}
|
||||
\n { ECHO; }
|
||||
}
|
||||
|
||||
<ACCUMULATE>{
|
||||
@ -99,15 +107,17 @@ special {comment_marker}|{assignment}|{shift}|{modify}
|
||||
}
|
||||
|
||||
<BUILD>{
|
||||
[ ]|\t { ; }
|
||||
{word}|. {
|
||||
ECHO;
|
||||
[ ] { ECHO; }
|
||||
{word}|{special}|. {
|
||||
for (char i = 0; i < schemantic[schim]; i++) {
|
||||
ECHOS(" ");
|
||||
}
|
||||
ECHO;
|
||||
|
||||
++schim;
|
||||
}
|
||||
\n {
|
||||
\n { // XXX we find the last newline and still step, resulting in an error
|
||||
ECHO;
|
||||
STEP_SCHEMANTIC;
|
||||
}
|
||||
}
|
||||
@ -147,6 +157,7 @@ signed main(const int argc, const char * const * const argv) {
|
||||
if (!strcmp(argv[1], "build")) {
|
||||
mystate = BUILD;
|
||||
build_file = fopen("build_file", "rb");
|
||||
if (!build_file) { exit(1); }
|
||||
STEP_SCHEMANTIC;
|
||||
} else {
|
||||
return 1;
|
||||
@ -154,7 +165,7 @@ signed main(const int argc, const char * const * const argv) {
|
||||
|
||||
yyin = fopen(argv[2], "r");
|
||||
|
||||
while(yylex() == EOL) { ; }
|
||||
yylex();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
17
data.py
17
data.py
@ -1,11 +1,14 @@
|
||||
from glob import glob
|
||||
import numpy as np
|
||||
import pickle
|
||||
import sys
|
||||
from sys import argv
|
||||
|
||||
from config import *
|
||||
import tard_wrangler
|
||||
|
||||
MAX_DATA_LIMIT = sys.maxsize
|
||||
|
||||
def get_source(path : str) -> [str]:
|
||||
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
||||
r = []
|
||||
@ -54,11 +57,17 @@ def whitespace_to_np_array(spaces : []) -> np.array:
|
||||
def compile_data():
|
||||
r = {'in': [], 'out': [], 'src': []}
|
||||
for n, path in enumerate(glob(COMPILE_INPUT_DIRECTORY + "/*.c")):
|
||||
if n > 47: break # XXX
|
||||
acc_path = path + ".acc"
|
||||
if n > MAX_DATA_LIMIT: break # XXX
|
||||
acc_path = path + ".acc"
|
||||
norm_path = path + ".norm"
|
||||
r['src'].append(path)
|
||||
r['in'] += get_source(path)
|
||||
r['out'] += read_acc(acc_path)
|
||||
source_batches = get_source(norm_path)
|
||||
accumulation = read_acc(acc_path)
|
||||
assert len(source_batches) == len(accumulation), (
|
||||
f"Some retard fucked up strings in {path}."
|
||||
)
|
||||
r['in'] += source_batches
|
||||
r['out'] += accumulation
|
||||
r['in'] = source_to_np_array(r['in'])
|
||||
r['out'] = whitespace_to_np_array(r['out'])
|
||||
return r
|
||||
|
52
formatter.py
52
formatter.py
@ -1,46 +1,20 @@
|
||||
from datetime import datetime
|
||||
from sys import argv
|
||||
import numpy as np
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
import tensorflow
|
||||
from tensorflow import keras
|
||||
from keras import layers
|
||||
|
||||
from config import *
|
||||
import model
|
||||
import data
|
||||
import tard_wrangler
|
||||
|
||||
dataset = data.get_data()
|
||||
if len(argv) > 1:
|
||||
mymodel = model.load_model(argv[1])
|
||||
else:
|
||||
dataset = data.get_data()
|
||||
mymodel = model.make_model(dataset)
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
|
||||
|
||||
# XXX: add more conv layers
|
||||
model = keras.Sequential([
|
||||
keras.Input(shape=(3, LINE_WIDTH, 1)),
|
||||
layers.Conv2D(
|
||||
filters=16,
|
||||
kernel_size=(3,5),
|
||||
strides=(1,1),
|
||||
activation='relu',
|
||||
padding='valid',
|
||||
),
|
||||
layers.Flatten(),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
||||
])
|
||||
|
||||
model.compile(
|
||||
optimizer='adam',
|
||||
loss='mse',
|
||||
metrics=['mae']
|
||||
)
|
||||
|
||||
model.fit(dataset['in'], dataset['out'],
|
||||
verbose=2,
|
||||
batch_size=10,
|
||||
epochs=50,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
prediction = model.predict(dataset['in'])[0]
|
||||
prediction = prediction.astype(np.uint8).tobytes()
|
||||
tard_wrangler.build("data/xop.c.norm", prediction)
|
||||
predictions = tard_wrangler.full_predict("data/xop.c.norm", mymodel)
|
||||
tard_wrangler.build("data/xop.c.norm", predictions)
|
||||
tard_wrangler.cat_build()
|
||||
|
53
model.py
Normal file
53
model.py
Normal file
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pickle
|
||||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from keras import layers
|
||||
|
||||
from config import *
|
||||
|
||||
@tf.function
|
||||
def custom_weighted_loss(y_true, y_pred):
|
||||
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
|
||||
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
|
||||
|
||||
def make_model(dataset : np.array) -> keras.Model:
|
||||
# XXX: add more conv layers
|
||||
model = keras.Sequential([
|
||||
keras.Input(shape=(3, LINE_WIDTH, 1)),
|
||||
layers.Conv2D(
|
||||
filters=16,
|
||||
kernel_size=(3,5),
|
||||
strides=(1,1),
|
||||
activation='relu',
|
||||
padding='valid',
|
||||
),
|
||||
layers.Flatten(),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
||||
])
|
||||
|
||||
model.compile(
|
||||
optimizer='adam',
|
||||
#loss='mse',
|
||||
loss=custom_weighted_loss,
|
||||
metrics=['mae']
|
||||
)
|
||||
|
||||
model.fit(dataset['in'], dataset['out'],
|
||||
verbose=2,
|
||||
batch_size=10,
|
||||
epochs=50,
|
||||
shuffle=True,
|
||||
)
|
||||
|
||||
return model
|
||||
|
||||
def load_model(path : str) -> keras.Model:
|
||||
return keras.models.load_model(path,
|
||||
compile=False
|
||||
)
|
0
models/.gitkeep
Normal file
0
models/.gitkeep
Normal file
@ -2,6 +2,7 @@ import subprocess
|
||||
import numpy as np
|
||||
|
||||
from config import *
|
||||
import data
|
||||
|
||||
def accumulate(path : str, output : str) -> None:
|
||||
process = subprocess.Popen(
|
||||
@ -9,10 +10,20 @@ def accumulate(path : str, output : str) -> None:
|
||||
shell=True,
|
||||
)
|
||||
|
||||
def build(path : str, prediction : np.array):
|
||||
with open("build_file", "wb") as file:
|
||||
file.write(prediction)
|
||||
def full_predict(path : str, model) -> []:
|
||||
r = []
|
||||
myinput = data.source_to_np_array(data.get_source(path))
|
||||
for i in myinput:
|
||||
r += model.predict(np.expand_dims(i, axis=0)).astype(np.uint8).tobytes()
|
||||
return r
|
||||
|
||||
def build(path : str, predictions : []) -> None:
|
||||
predictions = b''.join([i.to_bytes(1, byteorder='big', signed=False) for i in predictions])
|
||||
with open("build_file", "wb") as f: f.write(predictions)
|
||||
process = subprocess.Popen(
|
||||
"converter.out build " + path + " > out.c",
|
||||
shell=True,
|
||||
)
|
||||
|
||||
def cat_build():
|
||||
with open("out.c") as f: print(f.read())
|
||||
|
Reference in New Issue
Block a user