it does something now
This commit is contained in:
@ -3,3 +3,4 @@ MAX_SHIMS = LINE_WIDTH - 1
|
|||||||
SOURCE_LINE_BATCH_SIZE = 3
|
SOURCE_LINE_BATCH_SIZE = 3
|
||||||
|
|
||||||
COMPILE_INPUT_DIRECTORY = "data/linux/"
|
COMPILE_INPUT_DIRECTORY = "data/linux/"
|
||||||
|
MODEL_DIRECTORY = "models/"
|
||||||
|
39
converter.l
39
converter.l
@ -18,14 +18,17 @@
|
|||||||
|
|
||||||
FILE * build_file;
|
FILE * build_file;
|
||||||
char schemantic[MAX_SHIMS];
|
char schemantic[MAX_SHIMS];
|
||||||
int schim = 0;
|
int schim;
|
||||||
|
|
||||||
#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, sizeof(char), build_file)
|
#define STEP_SCHEMANTIC do { \
|
||||||
|
schim = 0; \
|
||||||
|
int re = fread(schemantic, sizeof(char), MAX_SHIMS, build_file); \
|
||||||
|
if (re != sizeof(char)*MAX_SHIMS) { printf("- %d\n", re); exit(2); } \
|
||||||
|
} while (0)
|
||||||
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
||||||
|
|
||||||
#define EOL '\n'
|
|
||||||
%}
|
%}
|
||||||
|
|
||||||
|
// != is missing
|
||||||
comment_marker (\/\*)|(\*\/)
|
comment_marker (\/\*)|(\*\/)
|
||||||
identifier \$?[A-Za-z0-9_]+
|
identifier \$?[A-Za-z0-9_]+
|
||||||
modify [+-]{2}
|
modify [+-]{2}
|
||||||
@ -35,8 +38,16 @@ shift (<<)|(>>)
|
|||||||
word {identifier}
|
word {identifier}
|
||||||
special {comment_marker}|{assignment}|{shift}|{modify}
|
special {comment_marker}|{assignment}|{shift}|{modify}
|
||||||
|
|
||||||
%x NORMALIZE ACCUMULATE BUILD
|
|
||||||
%x IN_STRING
|
%x IN_STRING
|
||||||
|
|
||||||
|
// Keep all but the required whitespaces
|
||||||
|
%x NORMALIZE
|
||||||
|
// Count the non-required whitespaces and write python arrays
|
||||||
|
%x ACCUMULATE
|
||||||
|
// Reconstruct normalized file based on binary whitespace count arrays
|
||||||
|
%x BUILD
|
||||||
|
|
||||||
|
%option yylineno
|
||||||
%option noyywrap nodefault
|
%option noyywrap nodefault
|
||||||
%%
|
%%
|
||||||
BEGIN mystate;
|
BEGIN mystate;
|
||||||
@ -61,10 +72,7 @@ special {comment_marker}|{assignment}|{shift}|{modify}
|
|||||||
ECHO;
|
ECHO;
|
||||||
was_word = false;
|
was_word = false;
|
||||||
}
|
}
|
||||||
\n {
|
\n { ECHO; }
|
||||||
ECHO;
|
|
||||||
return EOL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
<ACCUMULATE>{
|
<ACCUMULATE>{
|
||||||
@ -99,15 +107,17 @@ special {comment_marker}|{assignment}|{shift}|{modify}
|
|||||||
}
|
}
|
||||||
|
|
||||||
<BUILD>{
|
<BUILD>{
|
||||||
[ ]|\t { ; }
|
[ ] { ECHO; }
|
||||||
{word}|. {
|
{word}|{special}|. {
|
||||||
ECHO;
|
|
||||||
for (char i = 0; i < schemantic[schim]; i++) {
|
for (char i = 0; i < schemantic[schim]; i++) {
|
||||||
ECHOS(" ");
|
ECHOS(" ");
|
||||||
}
|
}
|
||||||
|
ECHO;
|
||||||
|
|
||||||
++schim;
|
++schim;
|
||||||
}
|
}
|
||||||
\n {
|
\n { // XXX we find the last newline and still step, resulting in an error
|
||||||
|
ECHO;
|
||||||
STEP_SCHEMANTIC;
|
STEP_SCHEMANTIC;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -147,6 +157,7 @@ signed main(const int argc, const char * const * const argv) {
|
|||||||
if (!strcmp(argv[1], "build")) {
|
if (!strcmp(argv[1], "build")) {
|
||||||
mystate = BUILD;
|
mystate = BUILD;
|
||||||
build_file = fopen("build_file", "rb");
|
build_file = fopen("build_file", "rb");
|
||||||
|
if (!build_file) { exit(1); }
|
||||||
STEP_SCHEMANTIC;
|
STEP_SCHEMANTIC;
|
||||||
} else {
|
} else {
|
||||||
return 1;
|
return 1;
|
||||||
@ -154,7 +165,7 @@ signed main(const int argc, const char * const * const argv) {
|
|||||||
|
|
||||||
yyin = fopen(argv[2], "r");
|
yyin = fopen(argv[2], "r");
|
||||||
|
|
||||||
while(yylex() == EOL) { ; }
|
yylex();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
17
data.py
17
data.py
@ -1,11 +1,14 @@
|
|||||||
from glob import glob
|
from glob import glob
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pickle
|
import pickle
|
||||||
|
import sys
|
||||||
from sys import argv
|
from sys import argv
|
||||||
|
|
||||||
from config import *
|
from config import *
|
||||||
import tard_wrangler
|
import tard_wrangler
|
||||||
|
|
||||||
|
MAX_DATA_LIMIT = sys.maxsize
|
||||||
|
|
||||||
def get_source(path : str) -> [str]:
|
def get_source(path : str) -> [str]:
|
||||||
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
||||||
r = []
|
r = []
|
||||||
@ -54,11 +57,17 @@ def whitespace_to_np_array(spaces : []) -> np.array:
|
|||||||
def compile_data():
|
def compile_data():
|
||||||
r = {'in': [], 'out': [], 'src': []}
|
r = {'in': [], 'out': [], 'src': []}
|
||||||
for n, path in enumerate(glob(COMPILE_INPUT_DIRECTORY + "/*.c")):
|
for n, path in enumerate(glob(COMPILE_INPUT_DIRECTORY + "/*.c")):
|
||||||
if n > 47: break # XXX
|
if n > MAX_DATA_LIMIT: break # XXX
|
||||||
acc_path = path + ".acc"
|
acc_path = path + ".acc"
|
||||||
|
norm_path = path + ".norm"
|
||||||
r['src'].append(path)
|
r['src'].append(path)
|
||||||
r['in'] += get_source(path)
|
source_batches = get_source(norm_path)
|
||||||
r['out'] += read_acc(acc_path)
|
accumulation = read_acc(acc_path)
|
||||||
|
assert len(source_batches) == len(accumulation), (
|
||||||
|
f"Some retard fucked up strings in {path}."
|
||||||
|
)
|
||||||
|
r['in'] += source_batches
|
||||||
|
r['out'] += accumulation
|
||||||
r['in'] = source_to_np_array(r['in'])
|
r['in'] = source_to_np_array(r['in'])
|
||||||
r['out'] = whitespace_to_np_array(r['out'])
|
r['out'] = whitespace_to_np_array(r['out'])
|
||||||
return r
|
return r
|
||||||
|
52
formatter.py
52
formatter.py
@ -1,46 +1,20 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
from sys import argv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import os
|
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
||||||
|
|
||||||
import tensorflow
|
|
||||||
from tensorflow import keras
|
|
||||||
from keras import layers
|
|
||||||
|
|
||||||
from config import *
|
from config import *
|
||||||
|
import model
|
||||||
import data
|
import data
|
||||||
import tard_wrangler
|
import tard_wrangler
|
||||||
|
|
||||||
dataset = data.get_data()
|
if len(argv) > 1:
|
||||||
|
mymodel = model.load_model(argv[1])
|
||||||
|
else:
|
||||||
|
dataset = data.get_data()
|
||||||
|
mymodel = model.make_model(dataset)
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
|
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
|
||||||
|
|
||||||
# XXX: add more conv layers
|
predictions = tard_wrangler.full_predict("data/xop.c.norm", mymodel)
|
||||||
model = keras.Sequential([
|
tard_wrangler.build("data/xop.c.norm", predictions)
|
||||||
keras.Input(shape=(3, LINE_WIDTH, 1)),
|
tard_wrangler.cat_build()
|
||||||
layers.Conv2D(
|
|
||||||
filters=16,
|
|
||||||
kernel_size=(3,5),
|
|
||||||
strides=(1,1),
|
|
||||||
activation='relu',
|
|
||||||
padding='valid',
|
|
||||||
),
|
|
||||||
layers.Flatten(),
|
|
||||||
layers.Dense(64, activation='relu'),
|
|
||||||
layers.Dense(64, activation='relu'),
|
|
||||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
|
||||||
])
|
|
||||||
|
|
||||||
model.compile(
|
|
||||||
optimizer='adam',
|
|
||||||
loss='mse',
|
|
||||||
metrics=['mae']
|
|
||||||
)
|
|
||||||
|
|
||||||
model.fit(dataset['in'], dataset['out'],
|
|
||||||
verbose=2,
|
|
||||||
batch_size=10,
|
|
||||||
epochs=50,
|
|
||||||
shuffle=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
prediction = model.predict(dataset['in'])[0]
|
|
||||||
prediction = prediction.astype(np.uint8).tobytes()
|
|
||||||
tard_wrangler.build("data/xop.c.norm", prediction)
|
|
||||||
|
53
model.py
Normal file
53
model.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pickle
|
||||||
|
import os
|
||||||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from keras import layers
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
|
||||||
|
@tf.function
|
||||||
|
def custom_weighted_loss(y_true, y_pred):
|
||||||
|
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
|
||||||
|
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
|
||||||
|
|
||||||
|
def make_model(dataset : np.array) -> keras.Model:
|
||||||
|
# XXX: add more conv layers
|
||||||
|
model = keras.Sequential([
|
||||||
|
keras.Input(shape=(3, LINE_WIDTH, 1)),
|
||||||
|
layers.Conv2D(
|
||||||
|
filters=16,
|
||||||
|
kernel_size=(3,5),
|
||||||
|
strides=(1,1),
|
||||||
|
activation='relu',
|
||||||
|
padding='valid',
|
||||||
|
),
|
||||||
|
layers.Flatten(),
|
||||||
|
layers.Dense(64, activation='relu'),
|
||||||
|
layers.Dense(64, activation='relu'),
|
||||||
|
layers.Dense(MAX_SHIMS) #activation='softmax'
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(
|
||||||
|
optimizer='adam',
|
||||||
|
#loss='mse',
|
||||||
|
loss=custom_weighted_loss,
|
||||||
|
metrics=['mae']
|
||||||
|
)
|
||||||
|
|
||||||
|
model.fit(dataset['in'], dataset['out'],
|
||||||
|
verbose=2,
|
||||||
|
batch_size=10,
|
||||||
|
epochs=50,
|
||||||
|
shuffle=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def load_model(path : str) -> keras.Model:
|
||||||
|
return keras.models.load_model(path,
|
||||||
|
compile=False
|
||||||
|
)
|
0
models/.gitkeep
Normal file
0
models/.gitkeep
Normal file
@ -2,6 +2,7 @@ import subprocess
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from config import *
|
from config import *
|
||||||
|
import data
|
||||||
|
|
||||||
def accumulate(path : str, output : str) -> None:
|
def accumulate(path : str, output : str) -> None:
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
@ -9,10 +10,20 @@ def accumulate(path : str, output : str) -> None:
|
|||||||
shell=True,
|
shell=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def build(path : str, prediction : np.array):
|
def full_predict(path : str, model) -> []:
|
||||||
with open("build_file", "wb") as file:
|
r = []
|
||||||
file.write(prediction)
|
myinput = data.source_to_np_array(data.get_source(path))
|
||||||
|
for i in myinput:
|
||||||
|
r += model.predict(np.expand_dims(i, axis=0)).astype(np.uint8).tobytes()
|
||||||
|
return r
|
||||||
|
|
||||||
|
def build(path : str, predictions : []) -> None:
|
||||||
|
predictions = b''.join([i.to_bytes(1, byteorder='big', signed=False) for i in predictions])
|
||||||
|
with open("build_file", "wb") as f: f.write(predictions)
|
||||||
process = subprocess.Popen(
|
process = subprocess.Popen(
|
||||||
"converter.out build " + path + " > out.c",
|
"converter.out build " + path + " > out.c",
|
||||||
shell=True,
|
shell=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def cat_build():
|
||||||
|
with open("out.c") as f: print(f.read())
|
||||||
|
Reference in New Issue
Block a user