bump
This commit is contained in:
8
compile_data.sh
Executable file
8
compile_data.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
[[ $# < 1 ]] && exit 1
|
||||||
|
|
||||||
|
find "$1" -type f -name "*.c" \
|
||||||
|
-exec vim +"set tabstop=8" +"set expandtab" +"retab" +wq {} \; \
|
||||||
|
-exec sh -c 'converter.out accumulate "$1" > "$1.acc"' _ {} \; \
|
||||||
|
-exec sh -c 'converter.out normalize "$1" > "$1.norm"' _ {} \;
|
@ -4,3 +4,4 @@ SOURCE_LINE_BATCH_SIZE = 3
|
|||||||
|
|
||||||
COMPILE_INPUT_DIRECTORY = "training_set/linux/"
|
COMPILE_INPUT_DIRECTORY = "training_set/linux/"
|
||||||
MODEL_DIRECTORY = "trained_models/"
|
MODEL_DIRECTORY = "trained_models/"
|
||||||
|
DATASET_FILE = "training_set/dataset-linux.pkl" # cached dataset
|
||||||
|
@ -28,11 +28,10 @@
|
|||||||
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
||||||
%}
|
%}
|
||||||
|
|
||||||
// != is missing
|
|
||||||
comment_marker (\/\*)|(\*\/)
|
comment_marker (\/\*)|(\*\/)
|
||||||
identifier \$?[A-Za-z0-9_]+
|
identifier \$?[A-Za-z0-9_]+
|
||||||
modify [+-]{2}
|
modify [+-]{2}
|
||||||
assignment ([+-/*%]|(<<)|(>>))=
|
assignment ([+-/*%!]|(<<)|(>>))=
|
||||||
shift (<<)|(>>)
|
shift (<<)|(>>)
|
||||||
|
|
||||||
word {identifier}
|
word {identifier}
|
||||||
|
2
data.py
2
data.py
@ -10,8 +10,6 @@ import tard_wrangler
|
|||||||
#MAX_DATA_LIMIT = sys.maxsize
|
#MAX_DATA_LIMIT = sys.maxsize
|
||||||
MAX_DATA_LIMIT = 1000
|
MAX_DATA_LIMIT = 1000
|
||||||
|
|
||||||
DATASET_FILE = "training_set/dataset-linux.pkl"
|
|
||||||
|
|
||||||
def get_source(path : str, normpath : str) -> [str]:
|
def get_source(path : str, normpath : str) -> [str]:
|
||||||
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
||||||
r = []
|
r = []
|
||||||
|
14
formatter.py
14
formatter.py
@ -1,18 +1,24 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from sys import argv
|
from sys import argv
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
from config import *
|
from config import *
|
||||||
import model
|
import model
|
||||||
import data
|
import data
|
||||||
import tard_wrangler
|
import tard_wrangler
|
||||||
|
|
||||||
if len(argv) > 1:
|
parser = ArgumentParser()
|
||||||
mymodel = model.load_model(argv[1])
|
parser.add_argument('--model', type=str, help='Specify the model to use')
|
||||||
|
parser.add_argument('file', type=str, help='The file to process')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.model:
|
||||||
|
mymodel = model.load_model(args.model)
|
||||||
else:
|
else:
|
||||||
dataset = data.get_data("dataset-linux.pkl")
|
dataset = data.get_data(DATASET_FILE)
|
||||||
mymodel = model.make_model(dataset)
|
mymodel = model.make_model(dataset)
|
||||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||||
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
|
mymodel.save(MODEL_DIRECTORY + f"model_-_{timestamp}.keras")
|
||||||
|
|
||||||
print(tard_wrangler.full_predict("training_set/xop.c", "training_set/xop.c.norm", mymodel))
|
print(tard_wrangler.full_predict(args.file, args.file + ".norm", mymodel))
|
||||||
|
4
get_linux_source.sh
Executable file
4
get_linux_source.sh
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
mkdir "training_set/linux"
|
||||||
|
find /usr/src/linux/ -type f -name "*.c" -exec cp --verbose {} "training_set/linux/" \;
|
5
model.py
5
model.py
@ -11,7 +11,7 @@ from config import *
|
|||||||
|
|
||||||
@tf.function
|
@tf.function
|
||||||
def custom_weighted_loss(y_true, y_pred):
|
def custom_weighted_loss(y_true, y_pred):
|
||||||
weights = tf.linspace(1.0, 0.1, tf.shape(y_pred)[-1])
|
weights = tf.linspace(2.0, 0.1, tf.shape(y_pred)[-1])
|
||||||
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
|
return tf.reduce_mean(tf.square((y_true - y_pred) * weights))
|
||||||
|
|
||||||
def make_model(dataset : np.array) -> keras.Model:
|
def make_model(dataset : np.array) -> keras.Model:
|
||||||
@ -28,12 +28,11 @@ def make_model(dataset : np.array) -> keras.Model:
|
|||||||
layers.Flatten(),
|
layers.Flatten(),
|
||||||
layers.Dense(64, activation='relu'),
|
layers.Dense(64, activation='relu'),
|
||||||
layers.Dense(64, activation='relu'),
|
layers.Dense(64, activation='relu'),
|
||||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
layers.Dense(MAX_SHIMS)
|
||||||
])
|
])
|
||||||
|
|
||||||
model.compile(
|
model.compile(
|
||||||
optimizer='adam',
|
optimizer='adam',
|
||||||
#loss='mse',
|
|
||||||
loss=custom_weighted_loss,
|
loss=custom_weighted_loss,
|
||||||
metrics=['mae']
|
metrics=['mae']
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user