bump
This commit is contained in:
parent
f8c8f7ef0c
commit
5d47089c2b
8
compile_data.sh
Executable file
8
compile_data.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/bin/sh
|
||||
|
||||
[[ $# < 1 ]] && exit 1
|
||||
|
||||
find "$1" -type f -name "*.c" \
|
||||
-exec vim +"set tabstop=8" +"set expandtab" +"retab" +wq {} \; \
|
||||
-exec sh -c 'converter.out accumulate "$1" > "$1.acc"' _ {} \; \
|
||||
-exec sh -c 'converter.out normalize "$1" > "$1.norm"' _ {} \;
|
@ -4,3 +4,4 @@ SOURCE_LINE_BATCH_SIZE = 3
|
||||
|
||||
COMPILE_INPUT_DIRECTORY = "training_set/linux/"
|
||||
MODEL_DIRECTORY = "trained_models/"
|
||||
DATASET_FILE = "training_set/dataset-linux.pkl" # cached dataset
|
||||
|
@ -28,11 +28,10 @@
|
||||
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
||||
%}
|
||||
|
||||
// != is missing
|
||||
comment_marker (\/\*)|(\*\/)
|
||||
identifier \$?[A-Za-z0-9_]+
|
||||
modify [+-]{2}
|
||||
assignment ([+-/*%]|(<<)|(>>))=
|
||||
assignment ([+-/*%!]|(<<)|(>>))=
|
||||
shift (<<)|(>>)
|
||||
|
||||
word {identifier}
|
||||
|
2
data.py
2
data.py
@ -10,8 +10,6 @@ import tard_wrangler
|
||||
#MAX_DATA_LIMIT = sys.maxsize
|
||||
MAX_DATA_LIMIT = 1000
|
||||
|
||||
DATASET_FILE = "training_set/dataset-linux.pkl"
|
||||
|
||||
def get_source(path : str, normpath : str) -> [str]:
|
||||
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
||||
r = []
|
||||
|
14
formatter.py
14
formatter.py
@ -1,18 +1,24 @@
|
||||
from datetime import datetime
|
||||
from sys import argv
|
||||
import numpy as np
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from config import *
|
||||
import model
|
||||
import data
|
||||
import tard_wrangler
|
||||
|
||||
if len(argv) > 1:
|
||||
mymodel = model.load_model(argv[1])
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument('--model', type=str, help='Specify the model to use')
|
||||
parser.add_argument('file', type=str, help='The file to process')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.model:
|
||||
mymodel = model.load_model(args.model)
|
||||
else:
|
||||
dataset = data.get_data("dataset-linux.pkl")
|
||||
dataset = data.get_data(DATASET_FILE)
|
||||
mymodel = model.make_model(dataset)
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
|
||||
|
||||
print(tard_wrangler.full_predict("training_set/xop.c", "training_set/xop.c.norm", mymodel))
|
||||
print(tard_wrangler.full_predict(args.file, args.file + ".norm", mymodel))
|
||||
|
4
get_linux_source.sh
Executable file
4
get_linux_source.sh
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
|
||||
mkdir "training_set/linux"
|
||||
find /usr/src/linux/ -type f -name "*.c" -exec cp --verbose {} "training_set/linux/" \;
|
5
model.py
5
model.py
@ -11,7 +11,7 @@ from config import *
|
||||
|
||||
@tf.function
|
||||
def custom_weighted_loss(y_true, y_pred):
|
||||
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
|
||||
weights = tf.linspace(2.0, 0.1, tf.shape(y_pred)[-1])
|
||||
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
|
||||
|
||||
def make_model(dataset : np.array) -> keras.Model:
|
||||
@ -28,12 +28,11 @@ def make_model(dataset : np.array) -> keras.Model:
|
||||
layers.Flatten(),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(64, activation='relu'),
|
||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
||||
layers.Dense(MAX_SHIMS)
|
||||
])
|
||||
|
||||
model.compile(
|
||||
optimizer='adam',
|
||||
#loss='mse',
|
||||
loss=custom_weighted_loss,
|
||||
metrics=['mae']
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user