getting somewhere i swear
This commit is contained in:
parent
95c847b9a1
commit
27338a3481
2
.gitignore
vendored
2
.gitignore
vendored
@ -3,3 +3,5 @@ venv/
|
|||||||
*.out
|
*.out
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.norm
|
*.norm
|
||||||
|
data/linux/
|
||||||
|
*.pkl
|
||||||
|
4
README.md
Normal file
4
README.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# NOTES
|
||||||
|
+ we have a problem on nuking system includes;
|
||||||
|
this fucks with trying to be language agnostic;
|
||||||
|
i wonder if hopefully the AI can just realize theres never spaces there
|
@ -1,2 +1,5 @@
|
|||||||
LINE_WIDTH = 80
|
LINE_WIDTH = 80
|
||||||
MAX_SHIMS = LINE_WIDTH - 1
|
MAX_SHIMS = LINE_WIDTH - 1
|
||||||
|
SOURCE_LINE_BATCH_SIZE = 3
|
||||||
|
|
||||||
|
COMPILE_INPUT_DIRECTORY = "data/linux/"
|
||||||
|
12
converter.l
12
converter.l
@ -4,12 +4,6 @@
|
|||||||
@STOP
|
@STOP
|
||||||
*/
|
*/
|
||||||
%{
|
%{
|
||||||
/* NOTE: this shall be compiled as a shared library so python may call in
|
|
||||||
*/
|
|
||||||
/* XXX: we have a problem on nuking system includes;
|
|
||||||
this fucks with trying to be language agnostic;
|
|
||||||
i wonder if hopefully the AI can just realize theres never spaces there
|
|
||||||
*/
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
@ -23,10 +17,10 @@
|
|||||||
int accumulator = 0;
|
int accumulator = 0;
|
||||||
|
|
||||||
FILE * build_file;
|
FILE * build_file;
|
||||||
int schemantic[MAX_SHIMS];
|
char schemantic[MAX_SHIMS];
|
||||||
int schim = 0;
|
int schim = 0;
|
||||||
|
|
||||||
#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, 1, build_file)
|
#define STEP_SCHEMANTIC fread(schemantic, MAX_SHIMS, sizeof(char), build_file)
|
||||||
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
#define ECHOS(s) fwrite(s, strlen(s), 1, yyout)
|
||||||
|
|
||||||
#define EOL '\n'
|
#define EOL '\n'
|
||||||
@ -108,7 +102,7 @@ special {comment_marker}|{assignment}|{shift}|{modify}
|
|||||||
[ ]|\t { ; }
|
[ ]|\t { ; }
|
||||||
{word}|. {
|
{word}|. {
|
||||||
ECHO;
|
ECHO;
|
||||||
for (int i = 0; i < schemantic[schim]; i++) {
|
for (char i = 0; i < schemantic[schim]; i++) {
|
||||||
ECHOS(" ");
|
ECHOS(" ");
|
||||||
}
|
}
|
||||||
++schim;
|
++schim;
|
||||||
|
104
data.py
104
data.py
@ -1,53 +1,71 @@
|
|||||||
|
from glob import glob
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pickle
|
||||||
|
from sys import argv
|
||||||
|
|
||||||
from config import *
|
from config import *
|
||||||
import tard_wrangler
|
import tard_wrangler
|
||||||
|
|
||||||
def get_data():
|
def get_source(path : str) -> [str]:
|
||||||
|
'''returns source file in $SOURCE_LINE_BATCH_SIZE line batches'''
|
||||||
r = []
|
r = []
|
||||||
INPUT_FILE = "data/xop.c"
|
# read data
|
||||||
def get_source(path : str) -> [str]:
|
with open(path, 'r') as file: lines = [line[:-1] for line in file]
|
||||||
'''returns source file 3 line batches'''
|
# pad with empty lines
|
||||||
r = []
|
for i in range(int((SOURCE_LINE_BATCH_SIZE-1)/2)):
|
||||||
with open(path, 'r') as file: lines = [line[:-1] for line in file]
|
|
||||||
lines.insert(0, "")
|
lines.insert(0, "")
|
||||||
lines.append("")
|
lines.append("")
|
||||||
for i in range(len(lines)-2):
|
# batch
|
||||||
r.append(lines[i:i+3])
|
for i in range(len(lines)-2):
|
||||||
return r
|
r.append(lines[i:i+SOURCE_LINE_BATCH_SIZE])
|
||||||
def source_to_np_array(source_batches : []) -> np.array:
|
return r
|
||||||
r = []
|
|
||||||
for s in source_batches:
|
def source_to_np_array(source_batches : []) -> np.array:
|
||||||
ascii_list = []
|
'''returns image like array from batches'''
|
||||||
for l in s:
|
r = []
|
||||||
l = l[:LINE_WIDTH]
|
for s in source_batches:
|
||||||
l = l.ljust(LINE_WIDTH)
|
ascii_list = []
|
||||||
l = [ord(i) for i in l]
|
for l in s:
|
||||||
ascii_list += l
|
l = l[:LINE_WIDTH] # cut long lines
|
||||||
n = np.reshape(ascii_list, (3, -1, 1))
|
l = l.ljust(LINE_WIDTH) # pad short lines
|
||||||
r.append(n)
|
l = [ord(i) for i in l]
|
||||||
r = np.array(r)
|
ascii_list += l
|
||||||
return r
|
n = np.reshape(ascii_list, (3, -1, 1))
|
||||||
def get_whitespace(path : str) -> [int]:
|
r.append(n)
|
||||||
'''XXX returns the whitespace list of every middle line'''
|
r = np.array(r)
|
||||||
r = []
|
return r
|
||||||
output = "muf_file.txt"
|
|
||||||
tard_wrangler.accumulate(INPUT_FILE, output)
|
def read_acc(path : str) -> [[int]]:
|
||||||
with open(output, 'r') as file:
|
r = []
|
||||||
for line in file:
|
with open(path, 'r') as file:
|
||||||
try:
|
for line in file:
|
||||||
l = eval(line)
|
try:
|
||||||
l = l + [0] * (MAX_SHIMS - len(l))
|
l = eval(line)
|
||||||
r.append(l)
|
l = l + [0] * (MAX_SHIMS - len(l))
|
||||||
except: pass
|
r.append(l)
|
||||||
return r
|
except: pass
|
||||||
def whitespace_to_np_array(spaces : []) -> np.array:
|
return r
|
||||||
r = spaces
|
|
||||||
r = np.array(r).reshape(len(spaces), -1)
|
def whitespace_to_np_array(spaces : []) -> np.array:
|
||||||
return r
|
r = spaces
|
||||||
source = source_to_np_array(get_source(INPUT_FILE))
|
r = np.array(r).reshape(len(spaces), -1)
|
||||||
whitespace = whitespace_to_np_array(get_whitespace(INPUT_FILE))
|
return r
|
||||||
r = {'in': source, 'out': whitespace}
|
|
||||||
|
def compile_data():
|
||||||
|
r = {'in': [], 'out': [], 'src': []}
|
||||||
|
for n, path in enumerate(glob(COMPILE_INPUT_DIRECTORY + "/*.c")):
|
||||||
|
if n > 47: break # XXX
|
||||||
|
acc_path = path + ".acc"
|
||||||
|
r['src'].append(path)
|
||||||
|
r['in'] += get_source(path)
|
||||||
|
r['out'] += read_acc(acc_path)
|
||||||
|
r['in'] = source_to_np_array(r['in'])
|
||||||
|
r['out'] = whitespace_to_np_array(r['out'])
|
||||||
|
return r
|
||||||
|
|
||||||
|
def get_data():
|
||||||
|
r = []
|
||||||
|
with open('dataset-linux.pkl', 'rb') as f: r = pickle.load(f)
|
||||||
assert len(r['in']) == len(r['out']), (
|
assert len(r['in']) == len(r['out']), (
|
||||||
"data in and out sizes were inconsistent ("
|
"data in and out sizes were inconsistent ("
|
||||||
+ str(r['in'].shape)
|
+ str(r['in'].shape)
|
||||||
@ -58,6 +76,8 @@ def get_data():
|
|||||||
return r
|
return r
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
if len(argv) == 2 and argv[1] == 'c': # clean compile
|
||||||
|
with open('dataset-linux.pkl', 'wb') as f: pickle.dump(compile_data(), f)
|
||||||
dataset = get_data()
|
dataset = get_data()
|
||||||
print(dataset)
|
print(dataset)
|
||||||
print(dataset['in'].shape, dataset['out'].shape)
|
print(dataset['in'].shape, dataset['out'].shape)
|
||||||
|
@ -24,6 +24,7 @@ model = keras.Sequential([
|
|||||||
),
|
),
|
||||||
layers.Flatten(),
|
layers.Flatten(),
|
||||||
layers.Dense(64, activation='relu'),
|
layers.Dense(64, activation='relu'),
|
||||||
|
layers.Dense(64, activation='relu'),
|
||||||
layers.Dense(MAX_SHIMS) #activation='softmax'
|
layers.Dense(MAX_SHIMS) #activation='softmax'
|
||||||
])
|
])
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user