Wednesday, December 8, 2010

Models of sequence evolution 4

Just to finish up with this topic. I wrapped the linear algebra routines into a function, then (i) generate a TN93 Q matrix for a particular choice of rates and πN, find P for that Q at a particular short t (0.1), multiply P by itself many times to see the convergence for the π values, and finally compare the P we got by matrix stuff to the P we obtain using the equations from Yang. It looks good to me. Time to move on.

Q 
[[-0.7 0.6 0.04 0.06]
[ 0.4 -0.5 0.04 0.06]
[ 0.04 0.06 -0.4 0.3 ]
[ 0.04 0.06 0.2 -0.3 ]]
evals
[ -1.10000000e+00 -2.00000000e-01 -1.62026873e-17 -6.00000000e-01]
L
[[ 0.89583414 0. 0. 0. ]
[ 0. 0.98019867 0. 0. ]
[ 0. 0. 1. 0. ]
[ 0. 0. 0. 0.94176453]]
U
[[ -8.32050294e-01 5.00000000e-01 -5.00000000e-01 -6.12373075e-17]
[ 5.54700196e-01 5.00000000e-01 -5.00000000e-01 -4.08248716e-17]
[ -2.98507646e-18 -5.00000000e-01 -5.00000000e-01 -8.32050294e-01]
[ 4.00977456e-18 -5.00000000e-01 -5.00000000e-01 5.54700196e-01]]
iU
[[ -7.21110255e-01 7.21110255e-01 -1.36437676e-17 -5.30723331e-17]
[ 4.00000000e-01 6.00000000e-01 -4.00000000e-01 -6.00000000e-01]
[ -4.00000000e-01 -6.00000000e-01 -4.00000000e-01 -6.00000000e-01]
[ -1.60118642e-16 -2.40177963e-16 -7.21110255e-01 7.21110255e-01]]
P
[[ 0.93354022 0.05655912 0.00396027 0.0059404 ]
[ 0.03770608 0.95239326 0.00396027 0.0059404 ]
[ 0.00396027 0.0059404 0.96109845 0.02900088]
[ 0.00396027 0.0059404 0.01933392 0.97076542]]
converges to:
[[ 0.2 0.3 0.2 0.3]
[ 0.2 0.3 0.2 0.3]
[ 0.2 0.3 0.2 0.3]
[ 0.2 0.3 0.2 0.3]]
explicit P:
[[ 0.93354022 0.05655912 0.00396027 0.0059404 ]
[ 0.03770608 0.95239326 0.00396027 0.0059404 ]
[ 0.00396027 0.0059404 0.96109845 0.02900088]
[ 0.00396027 0.0059404 0.01933392 0.97076542]]

code:

from __future__ import division
import math
import numpy as np

def JC69_Q():
a = 0.01/3
Q = np.asarray(
[ -3*a, a, a, a,
a, -3*a, a, a,
a, a, -3*a, a,
a, a, a, -3*a ])
Q.shape = (4,4)
return Q

def TN93_values():
pi = [0.2,0.3,0.2,0.3]
T,C,A,G = pi
Y = T + C
R = A + G
a1 = 2 # Y transitions
a2 = 1 # R transitions
b = 0.2 # transversions
return T,C,A,G,Y,R,a1,a2,b

def TN93_Q():
T,C,A,G,Y,R,a1,a2,b = TN93_values()
Q = np.asarray(
[ -(a1*C + b*R), a1*C, b*A, b*G,
a1*T, -(a1*T + b*R), b*A, b*G,
b*T, b*C, -(a2*G + b*Y), a2*G,
b*T, b*C, a2*A, -(a2*A + b*Y) ])
Q.shape = (4,4)
return Q

def TN93_P(t):
# order TCAG
T,C,A,G,Y,R,a1,a2,b = TN93_values()
e2 = math.e**(-b*t)
e3 = math.e**(-(R*a2 + Y*b)*t)
e4 = math.e**(-(Y*a1 + R*b)*t)

TY,CY,AR,GR = T/Y,C/Y,A/R,G/R
M = [
T + TY*R*e2 + CY*e4, C + CY*R*e2 - CY*e4,
A*(1 - e2), G*(1 - e2),
T + TY*R*e2 - TY*e4, C + CY*R*e2 + TY*e4,
A*(1 - e2), G*(1 - e2),
T*(1 - e2), C*(1 - e2),
A + AR*Y*e2 + GR*e3, G + GR*Y*e2 - GR*e3,
T*(1 - e2), C*(1 - e2),
A + AR*Y*e2 - AR*e3, G + GR*Y*e2 + AR*e3 ]
P = np.array(M)
P.shape = (4,4)
return P

def convert(Q,t,debug=False):
if debug:
print 'Q', '\n', Q
evals, evecs = np.linalg.eig(Q)
if debug:
print 'evals', '\n', evals
L = np.diag([math.e**(k*t) for k in evals])
if debug:
print 'L', '\n', L
U = evecs
if debug:
print 'U', '\n', U
iU = np.linalg.inv(U)
if debug:
print 'iU', '\n', iU
P = np.dot(U,np.dot(L,iU))
if debug:
print 'P', '\n', P
return P

def long_time(P,N=5000):
M = P
for i in range(N):
M = np.dot(P,M)
return M

if __name__ == '__main__':
#Q = JC69_Q()
Q = TN93_Q()
t = 0.1
P = convert(Q,t,debug=True)
print 'converges to:'
print long_time(P)
print 'explicit P:'
print TN93_P(t)