Monday, August 22, 2011

Pretty code (4)

Explanation of what this is about from last time here. I got triple-quoted strings working, and I'm really pleased with the results. More another time on other options for code-highlighting.

import sys
from keyword import kwlist
from string import punctuation as punct
from utils import load_data
import html_tags as H
pywords = ['True','False','list','dict',
'int','float','append','extend',
'sys','argv','pop','open','write',
'close','string','time']

try:
fn = sys.argv[1]
except IndexError:
fn = 'example.py'
data = list(load_data(fn))

D = {'is_cm':False,
'in_str_1':False,
'in_str_2':False,
'in_str_3':False }

L = list()

while data:
c = data.pop(0)
# comments first
if c == '#':
if not (D['in_str_1'] or D['in_str_2']):
L.extend(list(H.cm_start))
D['is_cm'] = True
if c == "\n" and D['is_cm']:
L.extend(list(H.cm_stop))
D['is_cm'] = False
L.append(c)

a = '''triple-quoted-string
with a continuation and two keywords'''


# triple-quoted strings
if c == "'" and len(data) > 1:
if data[0] == "'" and data[1] == "'":
if (not D['in_str_1'] and not D['in_str_2']):
if not D['in_str_3']:
L.pop()
L.extend(list(H.str3_start))
L.extend(["'"] * 3)
data.pop(0)
data.pop(0)
D['in_str_3'] = True
else:
data.pop(0)
data.pop(0)
L.extend(["'"] * 2)
L.extend(list(H.str3_stop))
D['in_str_3'] = False
continue

# single-quoted strings
if c == "'":
if D['in_str_3']:
continue
if (not D['in_str_1']):
if not D['in_str_2']:
# start a str_1
L.pop()
L.extend(list(H.str_start))
L.append(c)
D['in_str_1'] = True
else:
# already in str_2 or str_3
pass
else:
if D['in_str_1']:
# terminate str_1
L.extend(list(H.str_stop))
D['in_str_1'] = False

# double-quoted strings
if c == '"':
if not D['in_str_2'] and not D['in_str_3']:
if not D['in_str_1']:
# start a str_2
L.pop()
L.extend(list(H.str_start))
L.append(c)
D['in_str_2'] = True
else:
# already in str_1 or str_3
pass
else:
if D['in_str_2']:
# terminate str_2
L.extend(list(H.str_stop))
D['in_str_2'] = False
s = ''.join(L)

# keywords last
pL = list()
D['is_str_3'] = False
for line in s.split('\n'):
D['is_cm'] = False
words = line.split()
for w in words:
# no kw highlighting in comments
if w.startswith(H.cm_start):
D['is_cm'] = True
if w.startswith(H.str3_start):
D['is_str_3'] = True
if H.str3_stop in w:
D['is_str_3'] = False
if not D['is_cm'] and not D['is_str_3']:
if w in kwlist:
r = H.kw_start + w + H.kw_stop
line = line.replace(w, r)
for p in pywords:
if p in w:
L = w.split(p)
if L[0] and not L[0][-1] in punct:
continue
if L[1] and not L[1][0] in punct:
continue
r = H.py_start + p + H.py_stop
line = line.replace(p, r)
pL.append(line)

s = H.br.join(pL)
pL = [H.head, H.hr, s, H.hr, H.tail]
s = '\n'.join(pL)

fn = fn.split('.')[0] + '.html'
FH = open(fn,'w')
FH.write(s + '\n')
FH.close()

Here's a screenshot of html_tags.py: