Wednesday, August 12, 2009

Python simulation of Needleman-Wunsch 3

This is the third of four posts showing code for a simulation to do Needleman-Wunsch alignments. In this installment, we get the algorithm itself. It is obvious by inspection that the doScoring function implements the NW algorithm. Check out the function handlePos in the trackback function.

Save this code as Algorithm.py in the same location as the other files.

from Inits import newDict

def doScoring(L,s1,s2,matrix,sc):
R = len(s1) + 1 # row length
C = len(s2) + 1 # col length
# for each column
for c in range(2, R+1):
# for each row in that column
for r in range(2, C+1):
i = (r-1)*R + c-1

up = L[i-R]
if up['path'] == 'D':
upscore = up['score'] + sc.gap
else:
upscore = up['score'] + sc.ext

left = L[i-1]
if left['path'] == 'D':
leftscore = left['score'] + sc.gap
else:
leftscore = left['score'] + sc.ext

diag = L[i-R-1]['score']

m = s1[c-2]
n = s2[r-2]
diag += matrix[m+n]

# for debugging
# report(r,c,i,upscore,leftscore,diag)

if (diag >= leftscore) and (diag >= upscore):
L[i] = newDict(diag, 'D')
elif (leftscore > upscore):
L[i] = newDict(leftscore, 'L')
else:
L[i] = newDict(upscore, 'U')

def trackback(L,s1,s2,blosum):
R = len(s1) + 1 # items per row or numCols

def handlePos(i,s1L,s2L):
j,k = i%R-1,i/R-1
D = L[i]
#print D['score'],i,j,k,s1[j],s2[k]
if D['path'] == 'U':
s1L.append('-')
s2L.append(s2[k])
return i-R
if D['path'] == 'L':
s1L.append(s1[j])
s2L.append('-')
return i-1
if D['path'] == 'D':
s1L.append(s1[j])
s2L.append(s2[k])
return i-(R+1)

s1L = list()
s2L = list()
i = len(L) - 1
while i > 0:
i = handlePos(i,s1L,s2L)
s1L.reverse()
s2L.reverse()
mL = list()
for i,c1 in enumerate(s1L):
c2 = s2L[i]
if '-' in c1 or '-' in c2:
mL.append(' ')
elif c1 == c2: mL.append(c1)
elif blosum[c1+c2] > 0: mL.append('+')
else: mL.append(' ')

retL = [''.join(s1L),''.join(mL),''.join(s2L)]
return retL