In [2]:
# Sam Dolan, December 2017, registration number: xxxxxxx
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams.update({'font.size': 14})  # Make the labels larger
In [42]:
# 2(b) A linear model.
arr=np.loadtxt("extrasolar_signal.txt")  
# I have downloaded the file from http://sam-dolan.staff.shef.ac.uk/mas212/data/extrasolar_signal.txt
omega = 2*np.pi / 14.6
tdata = arr[:,0]
vdata = arr[:,1]
ts = tdata; 
vs = vdata;
N = len(ts)
XX = np.zeros((N,2))
for i in range(N):
    XX[i,:] = [np.sin(omega*ts[i]), np.cos(omega*ts[i])]
XT = XX.transpose()
b = np.dot(XT, vs)
A = np.dot(XT, XX)
beta = np.linalg.solve(A, b)
print("The best-fit parameters are:")
print("\tbeta[0] = %.10f\n\tbeta[1] = %.10f" % (beta[0], beta[1]))

tlin = np.linspace(0, 80, 200)
vfit = beta[0]*np.sin(omega*tlin) + beta[1]*np.cos(omega*tlin)

# Plot the image
plt.xlim(0,90)
plt.plot(ts, vs, "b.", label="Data");
plt.plot(tlin, vfit, "r-", label="Model");
plt.legend(loc=0)
plt.xlabel("Time in days");
plt.ylabel("Relative velocity in m/s"); 

plt.tight_layout(pad=0.5)
plt.savefig("2b_linear_model.pdf")
The best-fit parameters are:
	beta[0] = -0.6778463417
	beta[1] = -0.1567790453
In [22]:
# 3(c) Testing the goodness of fit.
# First, work out the residuals
vfit = beta[0]*np.sin(omega*ts) + beta[1]*np.cos(omega*ts)
vres = vs - vfit  # The residuals
plt.plot(ts, vres, '.', label="Residuals")
plt.xlabel("Time in days");
plt.ylabel("Residual (m/s)"); 
plt.tight_layout(pad=0.5)
plt.yticks([-1,-0.5,0,0.5,1])
plt.gcf().set_size_inches(5,4)
plt.tight_layout(pad=0.5)
plt.savefig("3c_residuals.pdf")

def RMSD(arr):
    """Calculate the root mean square deviation (RMSD) from the residuals."""
    return np.sqrt(np.sum(arr**2)/len(arr))

print("It does not look 'random': there is evidence of residual structure here (e.g. oscillations).")
print("The residuals are correlated: the n-th residual is not independent of the (n+1)-th and (n-1)-th residual.")
print("This suggests that a better model can be found.")
print("One measure of the 'goodness of fit' is the root mean square deviation (RMSD):")
rmsd0 = RMSD(vres)
print("\tRMSD = %.10f\n" % rmsd0)
It does not look 'random': there is evidence of residual structure here (e.g. oscillations).
The residuals are correlated: the n-th residual is not independent of the (n+1)-th and (n-1)-th residual.
This suggests that a better model can be found.
One measure of the 'goodness of fit' is the root mean square deviation (RMSD):
	RMSD = 0.3120280023

In [26]:
# 3(c) continued.
# The auto-correlation coefficient
n1 = len(vres) 
vres2 = np.zeros(n1-1)
vres2 = vres[1:]
vres1 = vres[:n1-1]

meanr = np.mean(vres1)
alpha1 = np.sum((vres1-meanr)*(vres2-meanr))/np.sum((vres1**2))
print("alpha_1 = %.4f" % alpha1)
print("The auto-correlation coefficient is a number (typically between -1 and 1) that")
print("measures the correlation between two data sets; in this case,")
print("between successive residuals.")
print("The value greater than 0.7 indicates a high positive correlation.")
print("This hints that there is additional structure within the data that was")
print("not captured by the crude model that we used.")

plt.gcf().set_size_inches(5,4)
plt.xlabel("$r_{k}$"); plt.ylabel("$r_{k+1}$")
plt.yticks([-0.5,0.0,0.5])
plt.plot(vres1, vres2, '.g')
plt.tight_layout(pad=0.5)
plt.savefig("3c_autocorrelation.pdf")
alpha_1 = 0.7646
The auto-correlation coefficient is a number (typically between -1 and 1) that
measures the correlation between two data sets; in this case,
between successive residuals.
The value greater than 0.7 indicates a high positive correlation.
This hints that there is additional structure within the data that was
not captured by the crude model that we used.
In [46]:
# 4(d) A physical model. Numerical solution of ODEs with midpoint method.
def dXdt(X, t, p, e):
    r, s, phi = X
    return np.array([s, -1/r**2 + p/r**3, p**0.5/r**2])

def midpoint(dXfn, tmax, nsteps, ic, par):
    """The midpoint method for solving ODEs. Arguments:
    dXdfn is a user-defined function for calculating derivatives,
    tmax the maximum time, 
    nsteps is the number of steps,
    ic is the initial condition (an array of 3 values)
    par is a two-element list of parameters, (p,e). 
    """
    dt = tmax / nsteps;
    Xs = np.zeros((3,nsteps+1))   # X has 3 rows for 3 variables (r,s,phi).
    ts = dt*np.arange(nsteps+1)
    Xs[:,0] = ic
    p,e = par
    for k in range(nsteps):
        X = Xs[:,k]
        Xhalf = X + 0.5*dt*dXfn(X, ts[k], p, e)
        Xs[:,k+1] = X + dt*dXfn(Xhalf, ts[k]+dt/2.0, p, e)
    return (ts,Xs)
In [57]:
# (e) Orbits and signals  
tmax = 60;
nsteps = 5000;
ics = [(1.0, 0.0, 0.0), (1.7, 0.5, np.pi/6), (1.3,0.8,-np.pi/2)]

plt.figure(figsize=(5,5))
for ic in ics:
    p0, e0, phi0 = ic;
    ts, Xs = midpoint(dXdt, tmax, nsteps, [p0/(1+e0), 0.0, phi0], (p0,e0))
    rs = Xs[0,::4]; phis = Xs[2,::4]
    plt.plot(rs*np.cos(phis), rs*np.sin(phis), '-', label="e = %.1f" % e0);
plt.axis('equal');
plt.xlabel('$r \, \cos \phi$', fontsize=16); plt.ylabel('$r \, \sin \phi$', fontsize=16);
plt.plot([0],[0],'.')
plt.tight_layout(pad=0.5)
plt.savefig("5e_orbits.pdf")
In [56]:
# 3(b) [continued]
plt.figure(figsize=(6,5))
tmax=80
for ic in ics:
    p0, e0, phi0 = ic;
    legend = "$p=%.1f$, $e=%.1f$" % (p0,e0)
    ts, Xs = midpoint(dXdt, tmax, nsteps, [p0/(1+e0), 0.0, phi0], (p0,e0))
    rs = Xs[0,:]; rdots = Xs[1,:]; phis = Xs[2,:]
    vs= rdots*np.cos(phis) - p0**0.5/rs*np.sin(phis)
    plt.plot(ts, vs, '-', label=legend);

plt.legend(loc=1)
plt.xlim(0,tmax+5)
plt.ylim(-1.0,1.8)
plt.xlabel('Time in days', fontsize=16); 
plt.ylabel('Relative velocity in m/s', fontsize=16);
plt.tight_layout(pad=0.5)
plt.savefig("5e_example_signals.pdf")
In [62]:
# (f) Fitting the physical model
from scipy.optimize import leastsq

dT = tdata[1] - tdata[0]  # The data is linearly-spaced
print("Time spacing in data = %.4f" % dT) # The data is linearly-spaced in t.

# This step size is too large for 'midpoint', so we will use intermediate steps
def calcV(ic, dt, tmax=80.0):
    p0, e0, phi0 = ic
    nsteps = int(tmax / dt)
    ts, Xs = midpoint(dXdt, tmax, nsteps, [p0/(1+e0), 0.0, phi0], (p0,e0))
    rs = Xs[0,:]; rdots = Xs[1,:]; phis = Xs[2,:]
    vs = rdots*np.cos(phis) - p0**0.5/rs*np.sin(phis)
    return vs

def calcVres(ic, dT = 0.133333333333, tmax=80.0):
    n2 = 6
    dt = dT / n2
    vs = calcV(ic, dt)
    return vs[::n2] - vdata

# Before attempting to minimize, it is important to find good initial estimates.
# After some experimentation, I came to the following:
ic0 = np.array((1.5, 0.4, 0))
tfit = np.linspace(0, 80, 2400)
vfit = calcV(ic0, tfit[1]-tfit[0])
plt.plot(tfit, vfit , 'r-')
plt.plot(tdata, vdata, 'b.')
Time spacing in data = 0.1333
Out[62]:
[<matplotlib.lines.Line2D at 0x11a996c18>]
In [81]:
# (f). Finding the best-fit parameters.
# This may take some seconds to run. 
bestfit = leastsq(calcVres, ic0)
par1 = bestfit[0]
print("Best fit parameters:\n\tp = %.6f,\te = %.6f,\tphi = %.6f" % tuple(par1))
tfit = np.linspace(0, 80, 2400)
vfit = calcV(bestfit[0], tfit[1]-tfit[0])
plt.plot(tdata, vdata, 'b.')
plt.plot(tfit, vfit , 'r-')
plt.xlabel('Time in days', fontsize=16); 
plt.ylabel('Relative velocity in m/s', fontsize=16);
plt.tight_layout(pad=0.5)
#plt.title("Exoplanet model: best fit")
plt.savefig("6f_best_fit.pdf")
Best fit parameters:
	p = 1.299353,	e = 0.510238,	phi = 0.243871
In [70]:
# (f)) continued
# Check the residuals
vres = calcVres(ic1)
plt.plot(tdata, vres, '.', label="Residuals")
plt.xlabel("Time in days");
plt.ylabel("Residual (m/s)"); 
plt.gcf().set_size_inches(4,3)
plt.tight_layout(pad=0.5)
plt.savefig("6f_residuals.pdf")

print("The root mean square deviation (RMSD) is now:")
rmsdbest = RMSD(vres)
print("\t%.10f" % rmsdbest)
print("The new model has a lower RMSD than the crude model (%.3f vs %.3f)." % (rmsdbest, rmsd0))
The root mean square deviation (RMSD) is now:
	0.1519120664
The new model has a lower RMSD than the crude model (0.152 vs 0.312).
In [74]:
n1 = len(vres) 
vres2 = np.zeros(n1-1)
vres2 = vres[1:]
vres1 = vres[:n1-1]
plt.xlabel("$r_{k}$"); plt.ylabel("$r_{k+1}$")
plt.yticks([-0.5,0.0,0.5])
plt.gcf().set_size_inches(4,3)
plt.plot(vres1, vres2, '.')
plt.tight_layout(pad=0.5)
plt.savefig("6f_autocorrelation.pdf")
print("There is no clear auto-correlation, suggesting a good fit has been achieved.")
There is no clear auto-correlation, suggesting a good fit has been achieved.

(g) Can we infer the planet's mass and semi-major axis? This is a little tricky, because the equations I gave you have had dimensionful constants removed. Let's start with Kepler's third law, in the form $$ a^3 = \frac{G (M+m)}{\Omega^2} $$ where $a$ is the semi-major axis, $\Omega$ is the orbital angular frequency ($\Omega = 2\pi / T$, where $T$ is the period), and $M$ and $m$ are the masses of the star and planet, respectively; $G$ is Newton's gravitational constant. We may assume that $m \ll M$.

In [77]:
 
0.43035515802599905 0.43035515802599905
In [85]:
G = 6.674e-11  # in SI units
M = 2e30 # The mass of the Sun in kg (we assume the star has this mass)
Omega = omega / (24*3600)  # the orbital frequency is encoded in the frequency of the signal
     # Here I have converted from 'per day' to Hz = s^{-1}. 
a3 = G*M / Omega**2
a = a3**(1/3.0)
astunit = 1.496e11 # This is the Earth-Sun distance in metres. 
print("The semi-major axis 'a' is approximately:\n\t%.3e metres, or\n\t%.3f astronomical units." % (a, a/astunit))
print("So the semi-major axis is %.1f times smaller than the Earth's." % (astunit/a))
e0 = par1[1]
rp = a*(1-e0)/astunit
print(rp,0.307/rp)
The semi-major axis 'a' is approximately:
	1.752e+10 metres, or
	0.117 astronomical units.
So the semi-major axis is 8.5 times smaller than the Earth's.
0.0573654387872 5.35165434956

To estimate the planet's mass, we can use the amplitude of the relative velocity of the star. The star is pulled by the gravitational attraction of the planet. The gravitational acceleration is $\approx G m / a^2$. The star is orbiting the centre of mass of the system, at a radius $r \approx \frac{m}{M} \times a$. Using the circular-motion formula, acceleration = $v^2 / r$. Equating these two results for acceleration, and rearranging leads to $$ m \approx v \sqrt{\frac{a M}{G}} . $$

In [93]:
v = 0.8  # approximately, in metres per second
mEarth = 5.972e24
mass = v*np.sqrt(a*M / G)
print("The mass of the planet is approximately:")
print("\t%.3e kg, or\n\t%.3f times the mass of the Earth." % (mass, mass/mEarth))
print("This is a crude estimate, based on assuming a circular orbit;")
print("however, this orbit is highly eccentric.")
m2 = v*np.sqrt(M*a/G * (1-e0)/(1+e0))
print("Following the method in last year's report we find the mass of the planet is:")
print("\t%.3e kg, or\n\t%.3f times the mass of the Earth." % (m2,m2/mEarth))
The mass of the planet is approximately:
	1.833e+25 kg, or
	3.070 times the mass of the Earth.
This is a crude estimate, based on assuming a circular orbit;
however, this orbit is highly eccentric.
Following the method in last year's report we find the mass of the planet is:
	1.044e+25 kg, or
	1.748 times the mass of the Earth.
In [ ]: