cleant up
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
venv/
|
venv/
|
||||||
*.yy.*
|
*.yy.*
|
||||||
*.out
|
*.out
|
||||||
|
__pycache__/
|
||||||
|
90
data.py
90
data.py
@ -1,43 +1,53 @@
|
|||||||
import re
|
import subprocess
|
||||||
from bidict import bidict
|
import numpy as np
|
||||||
|
|
||||||
#CHAR_TOKENS = bidict({
|
from config import *
|
||||||
# '': 0,
|
|
||||||
# '\n': 1,
|
|
||||||
#})
|
|
||||||
#CHAR_TOKEN_OFFSET = 1
|
|
||||||
|
|
||||||
def encode(s : str) -> str:
|
def get_data():
|
||||||
return re.sub(r'\s+', ' ', s)
|
r = []
|
||||||
|
INPUT_FILE = "data/xop.c"
|
||||||
|
def get_source(path : str) -> [str]:
|
||||||
|
'''returns source file 3 line batches'''
|
||||||
|
r = []
|
||||||
|
with open(path, 'r') as file:
|
||||||
|
lines = []
|
||||||
|
for line in file:
|
||||||
|
lines.append(line.strip())
|
||||||
|
r = [lines[i:i + 3] for i in range(0, len(lines), 3)]
|
||||||
|
return r
|
||||||
|
def source_to_np_array(source_batches : []) -> np.array:
|
||||||
|
r = []
|
||||||
|
for s in source_batches:
|
||||||
|
ascii_list = []
|
||||||
|
for l in s:
|
||||||
|
l = l[:LINE_WIDTH]
|
||||||
|
l = l.ljust(LINE_WIDTH)
|
||||||
|
l = [ord(i) for i in l]
|
||||||
|
ascii_list += l
|
||||||
|
n = np.reshape(ascii_list, (3, -1, 1))
|
||||||
|
n = np.expand_dims(n, axis=0)
|
||||||
|
r.append(n)
|
||||||
|
return r
|
||||||
|
def get_whitespace(path : str) -> [int]:
|
||||||
|
'''XXX returns the whitespace list of every middle line'''
|
||||||
|
r = []
|
||||||
|
output_file = "muf_file.txt"
|
||||||
|
process = subprocess.Popen(
|
||||||
|
"converter.out accumulate " + path + " > " + output_file,
|
||||||
|
shell=True,
|
||||||
|
)
|
||||||
|
with open(output_file, 'r') as file:
|
||||||
|
for n, line in enumerate(file):
|
||||||
|
if ((n + 2) % 3) != 0: continue
|
||||||
|
r.append(eval(line))
|
||||||
|
return r
|
||||||
|
source = source_to_np_array(get_source(INPUT_FILE))
|
||||||
|
whitespace = get_whitespace(INPUT_FILE)
|
||||||
|
whitespace = [np.array(i) for i in whitespace]
|
||||||
|
r = {'in': source, 'out': whitespace}
|
||||||
|
assert len(r['in']) == len(r['in']), "data in and out sizes were inconsistent."
|
||||||
|
return r
|
||||||
|
|
||||||
#def decode(s : str, o : [int]) -> str:
|
if __name__ == "__main__":
|
||||||
# result = []
|
dataset = get_data()
|
||||||
# space_index = 0
|
print(dataset)
|
||||||
# for char in s:
|
|
||||||
# if char == ' ':
|
|
||||||
# if o[space_index] in CHAR_TOKENS.inverse:
|
|
||||||
# result.append(CHAR_TOKENS.inverse[o[space_index]])
|
|
||||||
# else:
|
|
||||||
# result.append(' ' * (o[space_index] - CHAR_TOKEN_OFFSET))
|
|
||||||
# space_index += 1
|
|
||||||
# else:
|
|
||||||
# result.append(char)
|
|
||||||
# return ''.join(result)
|
|
||||||
|
|
||||||
def decode(s : str, o : [int]) -> str:
|
|
||||||
result = []
|
|
||||||
space_index = 0
|
|
||||||
for char in s:
|
|
||||||
if char == ' ':
|
|
||||||
result.append(' ' * (o[space_index])
|
|
||||||
space_index += 1
|
|
||||||
else:
|
|
||||||
result.append(char)
|
|
||||||
return ''.join(result)
|
|
||||||
|
|
||||||
def batchificate(f):
|
|
||||||
BATCH_SIZE = 32
|
|
||||||
s = open(f, 'r').read()
|
|
||||||
s = encode(s)
|
|
||||||
|
|
||||||
print(decode(encode('if ( a == b ) { a = c )'), [2,0,2,2,0,1,0,4,1,1]))
|
|
||||||
|
57
formatter.py
57
formatter.py
@ -1,62 +1,15 @@
|
|||||||
import subprocess
|
|
||||||
import os
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import os
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||||
|
|
||||||
import tensorflow
|
import tensorflow
|
||||||
from tensorflow import keras
|
from tensorflow import keras
|
||||||
from keras import layers
|
from keras import layers
|
||||||
|
|
||||||
LINE_WIDTH = 80
|
from config import *
|
||||||
MAX_SHIMS = LINE_WIDTH - 1
|
import data
|
||||||
|
|
||||||
|
dataset = data.get_data()
|
||||||
def get_data():
|
|
||||||
r = []
|
|
||||||
def get_source(path : str) -> [str]:
|
|
||||||
'''returns source file 3 line batches'''
|
|
||||||
r = []
|
|
||||||
with open(path, 'r') as file:
|
|
||||||
lines = []
|
|
||||||
for line in file:
|
|
||||||
lines.append(line.strip())
|
|
||||||
r = [lines[i:i + 3] for i in range(0, len(lines), 3)]
|
|
||||||
return r
|
|
||||||
def source_to_np_array(source_batches : []) -> np.array:
|
|
||||||
r = []
|
|
||||||
for s in source_batches:
|
|
||||||
ascii_list = []
|
|
||||||
for l in s:
|
|
||||||
l = l[:LINE_WIDTH]
|
|
||||||
l = l.ljust(LINE_WIDTH)
|
|
||||||
l = [ord(i) for i in l]
|
|
||||||
ascii_list += l
|
|
||||||
n = np.reshape(ascii_list, (3, -1, 1))
|
|
||||||
n = np.expand_dims(n, axis=0)
|
|
||||||
r.append(n)
|
|
||||||
return r
|
|
||||||
def get_whitespace(path : str) -> [int]:
|
|
||||||
'''XXX returns the whitespace list of every middle line'''
|
|
||||||
r = []
|
|
||||||
output_file = "muf_file.txt"
|
|
||||||
process = subprocess.Popen(
|
|
||||||
"converter.out accumulate " + path + " > " + output_file,
|
|
||||||
shell=True,
|
|
||||||
)
|
|
||||||
with open(output_file, 'r') as file:
|
|
||||||
for n, line in enumerate(file):
|
|
||||||
if ((n + 2) % 3) != 0: continue
|
|
||||||
r.append(eval(line))
|
|
||||||
return r
|
|
||||||
source = source_to_np_array(get_source("in/xop.c"))
|
|
||||||
whitespace = get_whitespace("in/xop.c")
|
|
||||||
whitespace = [np.array(i) for i in whitespace]
|
|
||||||
r = {'in': source, 'out': whitespace}
|
|
||||||
return r
|
|
||||||
|
|
||||||
data = get_data()
|
|
||||||
assert len(data['in']) == len(data['in']), "data in and out sizes were inconsistent."
|
|
||||||
print(data['in'], data['out'])
|
|
||||||
|
|
||||||
model = keras.Sequential([
|
model = keras.Sequential([
|
||||||
layers.Conv2D(
|
layers.Conv2D(
|
||||||
@ -90,7 +43,7 @@ model.compile(
|
|||||||
metrics=['accuracy']
|
metrics=['accuracy']
|
||||||
)
|
)
|
||||||
|
|
||||||
model.fit(data['in'], data['out'],
|
model.fit(dataset['in'], dataset['out'],
|
||||||
verbose=2,
|
verbose=2,
|
||||||
batch_size=10,
|
batch_size=10,
|
||||||
epochs=50,
|
epochs=50,
|
||||||
|
Reference in New Issue
Block a user