This commit is contained in:
anon 2024-11-03 13:13:20 +01:00
parent f8c8f7ef0c
commit 5d47089c2b
7 changed files with 26 additions and 11 deletions

8
compile_data.sh Executable file
View File

@ -0,0 +1,8 @@
#!/bin/sh
[[ $# < 1 ]] && exit 1
find "$1" -type f -name "*.c" \
-exec vim +"set tabstop=8" +"set expandtab" +"retab" +wq {} \; \
-exec sh -c 'converter.out accumulate "$1" > "$1.acc"' _ {} \; \
-exec sh -c 'converter.out normalize "$1" > "$1.norm"' _ {} \;

View File

@ -4,3 +4,4 @@ SOURCE_LINE_BATCH_SIZE = 3
COMPILE_INPUT_DIRECTORY = "training_set/linux/"
MODEL_DIRECTORY = "trained_models/"
DATASET_FILE = "training_set/dataset-linux.pkl" # cached dataset

View File

@ -28,11 +28,10 @@
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
%}
// != is missing
comment_marker (\/\*)|(\*\/)
identifier \$?[A-Za-z0-9_]+
modify [+-]{2}
assignment ([+-/*%]|(<<)|(>>))=
assignment ([+-/*%!]|(<<)|(>>))=
shift (<<)|(>>)
word {identifier}

View File

@ -10,8 +10,6 @@ import tard_wrangler
#MAX_DATA_LIMIT = sys.maxsize
MAX_DATA_LIMIT = 1000
DATASET_FILE = "training_set/dataset-linux.pkl"
def get_source(path : str, normpath : str) -> [str]:
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
r = []

View File

@ -1,18 +1,24 @@
from datetime import datetime
from sys import argv
import numpy as np
from argparse import ArgumentParser
from config import *
import model
import data
import tard_wrangler
if len(argv) > 1:
mymodel = model.load_model(argv[1])
parser = ArgumentParser()
parser.add_argument('--model', type=str, help='Specify the model to use')
parser.add_argument('file', type=str, help='The file to process')
args = parser.parse_args()
if args.model:
mymodel = model.load_model(args.model)
else:
dataset = data.get_data("dataset-linux.pkl")
dataset = data.get_data(DATASET_FILE)
mymodel = model.make_model(dataset)
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
print(tard_wrangler.full_predict("training_set/xop.c", "training_set/xop.c.norm", mymodel))
print(tard_wrangler.full_predict(args.file, args.file + ".norm", mymodel))

4
get_linux_source.sh Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
mkdir "training_set/linux"
find /usr/src/linux/ -type f -name "*.c" -exec cp --verbose {} "training_set/linux/" \;

View File

@ -11,7 +11,7 @@ from config import *
@tf.function
def custom_weighted_loss(y_true, y_pred):
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
weights = tf.linspace(2.0, 0.1, tf.shape(y_pred)[-1])
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
def make_model(dataset : np.array) -> keras.Model:
@ -28,12 +28,11 @@ def make_model(dataset : np.array) -> keras.Model:
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(MAX_SHIMS) #activation='softmax'
layers.Dense(MAX_SHIMS)
])
model.compile(
optimizer='adam',
#loss='mse',
loss=custom_weighted_loss,
metrics=['mae']
)