44 lines
1.0 KiB
Python
44 lines
1.0 KiB
Python
import re
|
|
from bidict import bidict
|
|
|
|
#CHAR_TOKENS = bidict({
|
|
# '': 0,
|
|
# '\n': 1,
|
|
#})
|
|
#CHAR_TOKEN_OFFSET = 1
|
|
|
|
def encode(s : str) -> str:
|
|
return re.sub(r'\s+', ' ', s)
|
|
|
|
#def decode(s : str, o : [int]) -> str:
|
|
# result = []
|
|
# space_index = 0
|
|
# for char in s:
|
|
# if char == ' ':
|
|
# if o[space_index] in CHAR_TOKENS.inverse:
|
|
# result.append(CHAR_TOKENS.inverse[o[space_index]])
|
|
# else:
|
|
# result.append(' ' * (o[space_index] - CHAR_TOKEN_OFFSET))
|
|
# space_index += 1
|
|
# else:
|
|
# result.append(char)
|
|
# return ''.join(result)
|
|
|
|
def decode(s : str, o : [int]) -> str:
|
|
result = []
|
|
space_index = 0
|
|
for char in s:
|
|
if char == ' ':
|
|
result.append(' ' * (o[space_index])
|
|
space_index += 1
|
|
else:
|
|
result.append(char)
|
|
return ''.join(result)
|
|
|
|
def batchificate(f):
|
|
BATCH_SIZE = 32
|
|
s = open(f, 'r').read()
|
|
s = encode(s)
|
|
|
|
print(decode(encode('if ( a == b ) { a = c )'), [2,0,2,2,0,1,0,4,1,1]))
|