Python for Bioinformatics: Pretty code (4)

Monday, August 22, 2011
Pretty code (4)

Explanation of what this is about from last time here. I got triple-quoted strings working, and I'm really pleased with the results. More another time on other options for code-highlighting.

import sys
from keyword import kwlist
from string import punctuation as punct
from utils import load_data
import html_tags as H
pywords = ['True','False','list','dict',
           'int','float','append','extend',
           'sys','argv','pop','open','write',
           'close','string','time']

try:  
    fn = sys.argv[1]
except IndexError:  
    fn = 'example.py'
data = list(load_data(fn))

D = {'is_cm':False,
     'in_str_1':False,
     'in_str_2':False,
     'in_str_3':False }

L = list()

while data:
    c = data.pop(0)
    # comments first
    if c == '#':
        if not (D['in_str_1'] or D['in_str_2']):
            L.extend(list(H.cm_start))
            D['is_cm'] = True
    if c == "\n" and D['is_cm']:
        L.extend(list(H.cm_stop))
        D['is_cm'] = False
    L.append(c)
    
    a = '''triple-quoted-string
    with a continuation and two keywords'''
    
    # triple-quoted strings
    if c == "'" and len(data) > 1:
        if data[0] == "'" and data[1] == "'":
            if (not D['in_str_1'] and not D['in_str_2']):
                if not D['in_str_3']:
                    L.pop()
                    L.extend(list(H.str3_start))
                    L.extend(["'"] * 3)
                    data.pop(0)
                    data.pop(0)
                    D['in_str_3'] = True
                else:
                    data.pop(0)
                    data.pop(0)
                    L.extend(["'"] * 2)
                    L.extend(list(H.str3_stop))
                    D['in_str_3'] = False
                    continue
    
    # single-quoted strings
    if c == "'":
        if D['in_str_3']:
            continue        
        if (not D['in_str_1']):
            if not D['in_str_2']:
                # start a str_1
                L.pop()
                L.extend(list(H.str_start))
                L.append(c)
                D['in_str_1'] = True
            else:
                # already in str_2 or str_3
                pass
        else:
            if D['in_str_1']:
                # terminate str_1    
                L.extend(list(H.str_stop))
                D['in_str_1'] = False
            
    # double-quoted strings
    if c == '"':
        if not D['in_str_2'] and not D['in_str_3']:
            if not D['in_str_1']:
                # start a str_2
                L.pop()
                L.extend(list(H.str_start))
                L.append(c)
                D['in_str_2'] = True
            else:
                # already in str_1 or str_3
                pass
        else:
            if D['in_str_2']:
                # terminate str_2    
                L.extend(list(H.str_stop))
                D['in_str_2'] = False
s = ''.join(L)

# keywords last
pL = list()
D['is_str_3'] = False
for line in s.split('\n'):
    D['is_cm'] = False
    words = line.split()
    for w in words:
        # no kw highlighting in comments
        if w.startswith(H.cm_start):
            D['is_cm'] = True
        if w.startswith(H.str3_start):
            D['is_str_3'] = True
        if H.str3_stop in w:
            D['is_str_3'] = False
        if not D['is_cm'] and not D['is_str_3']:
            if w in kwlist:
                r = H.kw_start + w + H.kw_stop
                line = line.replace(w, r)
            for p in pywords:
                if p in w:
                    L = w.split(p)
                    if L[0] and not L[0][-1] in punct:
                        continue
                    if L[1] and not L[1][0] in punct:
                        continue
                    r = H.py_start + p + H.py_stop
                    line = line.replace(p, r)
    pL.append(line)
    
s = H.br.join(pL)
pL = [H.head, H.hr, s, H.hr, H.tail]
s = '\n'.join(pL)

fn = fn.split('.')[0] + '.html'
FH = open(fn,'w')
FH.write(s + '\n')
FH.close()
Here's a screenshot of html_tags.py: