init
This commit is contained in:
43
data.py
Normal file
43
data.py
Normal file
@ -0,0 +1,43 @@
|
||||
import re
|
||||
from bidict import bidict
|
||||
|
||||
#CHAR_TOKENS = bidict({
|
||||
# '': 0,
|
||||
# '\n': 1,
|
||||
#})
|
||||
#CHAR_TOKEN_OFFSET = 1
|
||||
|
||||
def encode(s : str) -> str:
|
||||
return re.sub(r'\s+', ' ', s)
|
||||
|
||||
#def decode(s : str, o : [int]) -> str:
|
||||
# result = []
|
||||
# space_index = 0
|
||||
# for char in s:
|
||||
# if char == ' ':
|
||||
# if o[space_index] in CHAR_TOKENS.inverse:
|
||||
# result.append(CHAR_TOKENS.inverse[o[space_index]])
|
||||
# else:
|
||||
# result.append(' ' * (o[space_index] - CHAR_TOKEN_OFFSET))
|
||||
# space_index += 1
|
||||
# else:
|
||||
# result.append(char)
|
||||
# return ''.join(result)
|
||||
|
||||
def decode(s : str, o : [int]) -> str:
|
||||
result = []
|
||||
space_index = 0
|
||||
for char in s:
|
||||
if char == ' ':
|
||||
result.append(' ' * (o[space_index])
|
||||
space_index += 1
|
||||
else:
|
||||
result.append(char)
|
||||
return ''.join(result)
|
||||
|
||||
def batchificate(f):
|
||||
BATCH_SIZE = 32
|
||||
s = open(f, 'r').read()
|
||||
s = encode(s)
|
||||
|
||||
print(decode(encode('if ( a == b ) { a = c )'), [2,0,2,2,0,1,0,4,1,1]))
|
Reference in New Issue
Block a user