Source code for fits

''' Collection of fitting functions '''
"""

Author : Thomas Haslwanter
Date : Nov-2015
Version: 1.5
"""

# Standard packages
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import os

# Special package
from collections import namedtuple
from statsmodels.formula.api import ols
try:
    from skimage import filters
except ImportError:     # for backwards capability to old installations
    from skimage import filter as filters
    
[docs]def demo_ransac(): '''Find the best-fit circle in an image, using the RANSAC algorithm ''' debug_flag = 1 # Since this function is only used in demo_ransac, define it here def drawCircle(center,r): '''Draw a circle''' nPts = 100 phi = np.linspace(0,2*np.pi,nPts) x = center[0] + r*np.cos(phi) y = center[1] + r*np.sin(phi) plt.hold(True) plt.plot(y,x,'r') # Get the data os.chdir(r'C:\Users\p20529\Coding\Matlab\imProc\ransac_fitCircle') data = plt.imread('0_5.bmp') # Eliminate edge artefacts rim = 20 data = data[rim:-rim, rim:-rim] imSize = data.shape # Find edges edges = filters.sobel(data) edgePnts = edges>0.15 np.sum(edgePnts) [x,y] = np.where(edgePnts==True) # set RANSAC parameters par={'eps': 3, 'ransac_threshold': 0.1, 'nIter': 500, 'lowerRadius': 5, 'upperRadius': 200 } # Allocate memory, for center(2D), radius, numPoints (structured array) fitted = np.zeros(par['nIter'], \ dtype={'names':['center', 'radius', 'nPts'], 'formats':['2f8', 'f8', 'i4']}) for ii in range(par['nIter']): # Takes 3 random points, and find the corresponding circle randEdges = np.random.permutation(len(x))[:3] (center, radius) = fitCircle(x[randEdges], y[randEdges]) # Eliminate very small and very large circles, and centers outside the image if not (par['lowerRadius'] < radius < par['upperRadius'] and \ 0 <= center[0] < imSize[0] and \ 0 <= center[1] < imSize[1]): continue # Make sure a reasonable number of points lies near that circle centerDistance = np.sqrt((x-center[0])**2 + (y-center[1])**2) inCircle = np.where(np.abs(centerDistance-radius)<par['eps'])[0] inPts = len(inCircle) if inPts < par['ransac_threshold'] *4*np.pi*radius*par['eps'] or inPts < 3: continue # Fit a circle to all good points, and save the corresponding parameters (center, radius) = fitCircle(x[inCircle], y[inCircle]) fitted[ii] = (center, radius, inPts) # If you want to see these points: if debug_flag == 1: plt.plot(y,x,'.') plt.hold(True) plt.plot(y[inCircle], x[inCircle],'r.') plt.plot(y[randEdges], x[randEdges], 'g+', ms=15) plt.axis('equal') plt.show() # Sort the circles, according to number of points included fitted = np.sort(fitted,order='nPts') # Show the best-fitting circle plt.imshow(data, cmap='gray', origin='lower') drawCircle(fitted[-1]['center'], fitted[-1]['radius']) plt.show()
[docs]def fit_circle(x,y): ''' Determine the best-fit circle to given datapoints. Parameters ---------- x : array (N,) x-values. y : array (N,) corresponding y-values. Returns ------- center : array (2,) x/y coordinates of center of the circle radius : float Circle radius. Examples -------- >>> r = 2 >>> center = np.r_[5,5] >>> theta = np.r_[0:2*np.pi:10j] >>> x = r*np.cos(theta)+center[0] >>> y = r*np.sin(theta)+center[1] >>> cFit,rFit = thLib.fits.fit_circle(x,y) ''' M = np.vstack((2*x,2*y,np.ones(len(x)))).T (par,_,_,_) = np.linalg.lstsq(M,x**2+y**2) center = par[:2] radius = np.sqrt(par[2]+np.sum(center**2)) return(center, radius)
[docs]def fit_exp(tFit, yFit, plotFlag=False): ''' Calculates best-fit parameters for the exponential decay to an offset. This can serve as an example for a general non-linear fit. Parameters ---------- tFit : array (N,) Time values. yFit : array (N,) Function values Returns ------- offset : float Function offset/bias. amp : float Amplitude of exponential function tau : float Decay time. Examples -------- >>> t = np.arange(10) >>> tau = 2. >>> amp = 1. >>> offset = 2. >>> x = offset + amp*np.exp(-t/tau) >>> fitted = thLib.fits.fit_exp(t,x) ''' from scipy import optimize # Define the fit-function and the error-function fitfunc = lambda p, x: p[0] + p[1]*np.exp(-x/p[2]) errfunc = lambda p,x,y: fitfunc(p,x) - y pInit = np.r_[0, 1, 1] # Initial values # Make the fit pFit, success = optimize.leastsq(errfunc, pInit, args=(tFit, yFit)) if plotFlag: # Plot the data and the fit plt.plot(tFit, yFit, label='rawdata') plt.hold(True) plt.plot(tFit, fitfunc(pFit,tFit), label='fit') plt.legend() plt.show() ExpFit = namedtuple('ExpFit', ['offset', 'amplitude', 'tau']) return ExpFit(*pFit)
[docs]def fit_line(x, y, alpha=0.05, newx=[], plotFlag=False): """ Linear regression fit. Parameters ---------- x : ndarray Input / Predictor. y : ndarray Input / Estimator. alpha : float Confidence limit [default=0.05] newx : float or ndarray Values for which the fit and the prediction limits are calculated (optional) plotFlag: int, optional 1 = plot, 0 = no_plot [default] Returns ------- a : float Intercept b : float Slope ci : ndarray Lower and upper confidence interval for the slope info : dictionary contains return information on - residuals - var_res - sd_res - alpha - tval - df newy : list(ndarray) Predictions for (newx, newx-ciPrediction, newx+ciPrediction) Examples -------- >>> x = np.r_[0:10:11j] >>> y = x**2 >>> (a,b,(ci_a, ci_b),_) = thLib.fits.fit_line(x,y) Summary: a=-15.0000+/-12.4590, b=10.0000+/-2.1060 Confidence intervals: ci_a=(-27.4590 - -2.5410), ci_b=(7.8940 - 12.1060) Residuals: variance = 95.3333, standard deviation = 9.7639 alpha = 0.050, tval = 2.2622, df=9 Notes ----- Example data and formulas are taken from D. Altman, "Practical Statistics for Medicine" """ # Summary data n = len(x) # number of samples Sxx = np.sum(x**2) - np.sum(x)**2./n # Syy = np.sum(y**2) - np.sum(y)**2./n # not needed here Sxy = np.sum(x*y) - np.sum(x)*np.sum(y)/np.float(n) mean_x = np.mean(x) mean_y = np.mean(y) # Linefit b = Sxy/Sxx a = mean_y - b*mean_x # Residuals fit = lambda xx: a + b*xx residuals = y - fit(x) var_res = np.sum(residuals**2)/np.float((n-2)) sd_res = np.sqrt(var_res) # Confidence intervals se_b = sd_res/np.sqrt(Sxx) se_a = sd_res*np.sqrt(np.sum(x**2)/np.float(n*Sxx)) df = n-2 # degrees of freedom tval = sp.stats.t.isf(alpha/2., df) # appropriate t value ci_a = a + tval*se_a*np.array([-1,1]) ci_b = b + tval*se_b*np.array([-1,1]) # create series of new test x-values to predict for npts = 100 px = np.linspace(np.min(x),np.max(x),num=npts) se_fit = lambda x: sd_res * np.sqrt( 1./n + (x-mean_x)**2./Sxx) se_predict = lambda x: sd_res * np.sqrt(1+1./n + (x-mean_x)**2./Sxx) print('Summary: a={0:5.4f}+/-{1:5.4f}, b={2:5.4f}+/-{3:5.4f}'.format(a,tval*se_a,b,tval*se_b)) print('Confidence intervals: ci_a=({0:5.4f} - {1:5.4f}), ci_b=({2:5.4f} - {3:5.4f})'.format(ci_a[0], ci_a[1], ci_b[0], ci_b[1])) print('Residuals: variance = {0:5.4f}, standard deviation = {1:5.4f}'.format(var_res, sd_res)) print('alpha = {0:.3f}, tval = {1:5.4f}, df={2:d}'.format(alpha, tval, df)) # Return info ri = {'residuals': residuals, 'var_res': var_res, 'sd_res': sd_res, 'alpha': alpha, 'tval': tval, 'df': df} if plotFlag: # Plot the data plt.figure() plt.plot(px, fit(px),'k', label='Regression line') #plt.plot(x,y,'k.', label='Sample observations', ms=10) plt.plot(x,y,'k.') x.sort() limit = (1-alpha)*100 plt.plot(x, fit(x)+tval*se_fit(x), 'r--', lw=2, label='Confidence limit ({0:.1f}%)'.format(limit)) plt.plot(x, fit(x)-tval*se_fit(x), 'r--', lw=2 ) plt.plot(x, fit(x)+tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2), label='Prediction limit ({0:.1f}%)'.format(limit)) plt.plot(x, fit(x)-tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2)) plt.xlabel('X values') plt.ylabel('Y values') plt.title('Linear regression and confidence limits') # configure legend plt.legend(loc=0) leg = plt.gca().get_legend() ltext = leg.get_texts() plt.setp(ltext, fontsize=10) # show the plot plt.show() if newx != []: try: newx.size except AttributeError: newx = np.array([newx]) print('Example: x = {0}+/-{1} => se_fit = {2:5.4f}, se_predict = {3:6.5f}'\ .format(newx[0], tval*se_predict(newx[0]), se_fit(newx[0]), se_predict(newx[0]))) newy = (fit(newx), fit(newx)-se_predict(newx), fit(newx)+se_predict(newx)) LineFit = namedtuple('LineFit',['intercept', 'slope', 'CIs', 'info', 'yFitted']) return LineFit(a,b,(ci_a, ci_b), ri, newy) else: LineFit = namedtuple('LineFit',['intercept', 'slope', 'CIs', 'info']) return LineFit(a,b,(ci_a, ci_b), ri)
[docs]def fit_sin(tList, yList, freq): ''' Fit a sine wave with a known frequency to a given set of data. y = amplitude * sin(2*pi*freq * tList + phase*pi/180) + bias Parameters ---------- yList : array datapoints tList : float time base, in sec freq : float in Hz Returns ------- phase : float in degrees amplitude : float bias : float Examples -------- >>> np.random.seed(1234) >>> t = np.arange(0,10,0.1) >>> x = 3 + 4*np.sin(2*np.pi*t + 5*np.pi/180) + np.random.randn(len(t)) >>> (phase, amp, offset) = thLib.fits.fit_sin(t, x, 1) ''' # Get input data b = yList rows = [ [np.sin(freq*2*np.pi*t), np.cos(freq*2*np.pi*t), 1] for t in tList] A = np.array(rows) # Make the fit (w,residuals,rank,sing_vals) = np.linalg.lstsq(A,b) # Extract desired parameters ... phase = np.arctan2(w[1],w[0])*180/np.pi amplitude = np.linalg.norm([w[0],w[1]],2) bias = w[2] # ... and return them SinFit = namedtuple('SinFit', ['phase', 'amp', 'offset']) return SinFit(phase,amplitude,bias)
[docs]def fit_ellipse(x,y): ''' Ellipse fit by Taubin's Method Parameters ---------- x : array x-coordinates of the ellipse points y : array y-coordinates of the ellipse points Returns ------- A : array Ellipse parameters A = [a b c d e f] is the vector of algebraic parameters of thefitting ellipse: ax^2 + bxy + cy^2 +dx + ey + f = 0 The vector A is normed, so that ||A||=1. Notes ----- Among fast non-iterative ellipse fitting methods, this is perhaps the most accurate and robust. This method fits a quadratic curve (conic) to a set of points; if points are better approximated by a hyperbola, this fit will return a hyperbola. To fit ellipses only, use "Direct Ellipse Fit". Published in G. Taubin, "Estimation Of Planar Curves, Surfaces And Nonplanar Space Curves Defined By Implicit Equations, With Applications To Edge And Range Image Segmentation", IEEE Trans. PAMI, Vol. 13, pages 1115-1138, (1991) ''' centroid = np.mean( np.vstack((x,y)), 1 ) # the centroid of the data set Z = np.vstack(( (x-centroid[0])**2, (x-centroid[0])*(y-centroid[1]), (y-centroid[1])**2, x-centroid[0], y-centroid[1], np.ones(len(x)) )).T; M = Z.T.dot(Z) / len(x) P = np.array( [[ M[0,0]-M[0,5]**2, M[0,1]-M[0,5]*M[1,5], M[0,2]-M[0,5]*M[2,5], M[0,3], M[0,4] ], [ M[0,1]-M[0,5]*M[1,5], M[1,1]-M[1,5]**2, M[1,2]-M[1,5]*M[2,5], M[1,3], M[1,4] ], [ M[0,2]-M[0,5]*M[2,5], M[1,2]-M[1,5]*M[2,5], M[2,2]-M[2,5]**2, M[2,3], M[2,4] ], [ M[0,3], M[1,3], M[2,3], M[3,3], M[3,4] ], [ M[0,4], M[1,4], M[2,4], M[3,4], M[4,4] ]]) Q = np.array( [[ 4*M[0,5], 2*M[1,5], 0, 0, 0], [ 2*M[1,5], M[0,5]+M[2,5], 2*M[1,5], 0, 0], [ 0, 2*M[1,5], 4*M[2,5], 0, 0], [ 0, 0, 0, 1, 0], [ 0, 0, 0, 0, 1]]) w, vr = sp.linalg.eig(P,Q) sortID = np.argsort(w) A = vr[:,sortID[0]] A = np.hstack( (A, -A[:3].T.dot(M[:3,5])) ) A4 = A[3] - 2*A[0]*centroid[0] - A[1]*centroid[1] A5 = A[4] - 2*A[2]*centroid[1] - A[1]*centroid[0] A6 = A[5] + A[0]*centroid[0]**2 +A[2]*centroid[1]**2+ \ A[1]*centroid[0]*centroid[1] - A[3]*centroid[0]-A[4]*centroid[1] A[3] = A4 A[4] = A5 A[5] = A6 A = A/np.linalg.norm(A); return A
[docs]def regress(y, X, alpha=0.05, intercept=True): ''' Multilinear regression and confidence intervals. Note that by default, an intercept is added to the design matrix! Parameters ---------- X : ndarray (N,) or (N,p) predictors at each of N observations y : ndarray(N,) observed responses alpha : float, optional Defines the 100*(1-alpha)% confidence level in ci. Default alpha=0.05 intercept : boolean If 'True', an intercept is automatically added to the design matrix. If 'False', the behavior is like the Matlab "regress" function, and no intercept is added. Returns ------- fit : float best fit intercept and regression parameters ci : ndarray (2,) confidence intervals for the coefficient estimates at the given alpha level (default: 95%-level) Examples -------- >>> x = np.array([0, 0, 10, 10]) >>> y = np.array([1, 3, 11, 13]) >>> (fit,ci) = thLib.fits.regress(y,x) >>> X = np.random.randn(10,2) >>> mat = np.hstack( (np.ones( (len(X),1) ), X) ) >>> pars = np.c_[[2, 4, 5]] >>> y = mat.dot(pars) >>> y += 0.1*np.random.randn(*y.shape) >>> (fit,ci) = thLib.fits.regress(y, mat) See also -------- fit_line ''' if X.ndim == 1: dof = len(X)-2 df = pd.DataFrame({'x':X, 'y':y}) modelTxt = 'y~x' # If you want the Matlab "regress", without intercept if intercept == False: modelTxt += '-1' dof += 1 # Fit the model model = ols(modelTxt,df).fit() ci = np.zeros((2, 2)) else: # Make sure the dimensions are right if X.shape[0] < X.shape[1]: X = X.T dof = X.shape[0]-X.shape[1]-1 if y.ndim == 1: y = np.c_[y] # For automatic labeling, put all the data into one matrix data = np.hstack((y,X)) df = pd.DataFrame(data) # Make a string like 'y~x1+x2+x3' labels = ['y'] modelTxt = 'y~' for ii in range(X.shape[1]): labels.append('x'+str(ii)) modelTxt += labels[-1]+'+' # Remove the last "+" modelTxt = modelTxt[:-1] # Automatic column labelling df.columns = labels # Memory allocation for CIs if intercept == True: ci = np.zeros((1+X.shape[1],2)) # If you want the Matlab "regress", without intercept else: dof +=1 modelTxt += '-1' ci = np.zeros((X.shape[1],2)) # Fit the model model = ols(modelTxt,df).fit() level = (1.-alpha/2.) # Make sure that you have the correct DOF tVal = sp.stats.t.ppf(level,dof) # Extract parameters and standard error from the model fit = model.params se = model.bse # Calculate CIs ci[:,0] = fit - se*tVal ci[:,1] = fit + se*tVal # Return a named tuple Regression= namedtuple('Regression', ['Fit', 'CIs']) return Regression(fit, ci)
if __name__=='__main__': # Produce an ellipse a = 5 b = 3 alpha = np.deg2rad(30) R = np.array([[np.cos(alpha), -np.sin(alpha)], [np.sin(alpha), np.cos(alpha)]]) theta = np.linspace(0, 2*np.pi, 100) x = a * np.cos(theta) y = b * np.sin(theta) XY = np.vstack( (x,y) ) ellipse = R.dot( XY ) plt.plot(ellipse[0,:], ellipse[1,:]) plt.show() A = fit_ellipse(ellipse[0,:], ellipse[1,:]) print('The ellipse-fit parameters are:') print(A) # Check (equations from http://mathworld.wolfram.com/Ellipse.html) (a,b,c,d,e,f) = A # To match the formula from wolfram b /= 2 d /= 2 g = f f = e/2 aFit = np.sqrt(2*(a*f**2 + c*d**2 + g*b**2 - 2*b*d*f - a*c*g)/((b**2-a*c)* (np.sqrt((a-c)**2 + 4*b**2) -(a+c)))) bFit = np.sqrt(2*(a*f**2 + c*d**2 + g*b**2 - 2*b*d*f - a*c*g)/((b**2-a*c)*(-np.sqrt((a-c)**2 + 4*b**2) - (a+c)))) thetaFit = np.pi/2 + 0.5*np.arctan2(2*b, a-c) print('a={0:3.2f}, b={1:3.2f}, theta = {2}'.format(aFit, bFit, np.rad2deg(thetaFit))) ''' # Test fit_exp t = np.arange(10) tau = 2. amp = 1. offset = 2. x = offset + amp*np.exp(-t/float(tau)) print(fit_exp(t,x)) import doctest doctest.testmod() # Test fit_line print('Fit line ------------') x = np.array([0, 0, 10, 10]) y = np.array([1, 3, 11, 13]) out = fit_line(x,y) print(out) # Test regress - 1dim print('Plain -----------------') x = np.array([0, 0, 10, 10]) y = np.array([1, 3, 11, 13]) (fit,ci) = regress(y,x) print(fit) print(ci) print('99% -----------------') (fit,ci) = regress(y,x, alpha=0.01) print(fit) print(ci) print('No intercept -------') (fit,ci) = regress(y,x, intercept=False) print(fit) print(ci) # Test regress - 2dim print('2-dim -----------') X = np.array([[1,1,1,1],[0,0,10,10]]).T y = np.array([1, 3, 11, 13]) (fit,ci) = regress(y, X, intercept=False) print(fit) print(ci) # Test regress - 2dim X = np.random.randn(10,2) mat = np.hstack( (np.ones( (len(X),1) ), X) ) pars = np.c_[[2, 4, 5]] y = mat.dot(pars) y += 0.1*np.random.randn(*y.shape) (fit,ci) = regress(y, mat) print(fit) print(ci) '''