Source code for fits

''' Collection of fitting functions '''
"""

Author : Thomas Haslwanter
Date : Nov-2015
Version: 1.5
"""

# Standard packages
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import os

# Special package
from collections import namedtuple
from statsmodels.formula.api import ols
try:
    from skimage import filters
except ImportError:     # for backwards capability to old installations
    from skimage import filter as filters
    
[docs]def demo_ransac():
    '''Find the best-fit circle in an image, using the RANSAC algorithm '''

    debug_flag = 1

    # Since this function is only used in demo_ransac, define it here
    def drawCircle(center,r):
        '''Draw a circle'''
        nPts = 100
        phi = np.linspace(0,2*np.pi,nPts)
        x = center[0] + r*np.cos(phi)
        y = center[1] + r*np.sin(phi)
        plt.hold(True)
        plt.plot(y,x,'r')

    # Get the data
    os.chdir(r'C:\Users\p20529\Coding\Matlab\imProc\ransac_fitCircle')
    data = plt.imread('0_5.bmp')
    
    # Eliminate edge artefacts
    rim = 20
    data = data[rim:-rim, rim:-rim]
    imSize = data.shape
    
    # Find edges
    edges = filters.sobel(data)
    edgePnts = edges>0.15
    np.sum(edgePnts)
    [x,y] = np.where(edgePnts==True)
       
    # set RANSAC parameters
    par={'eps': 3, 
         'ransac_threshold': 0.1, 
         'nIter': 500, 
         'lowerRadius': 5,
         'upperRadius': 200   
    }
    
    # Allocate memory, for center(2D), radius, numPoints (structured array)
    fitted = np.zeros(par['nIter'], \
    dtype={'names':['center', 'radius', 'nPts'], 'formats':['2f8', 'f8', 'i4']})
    
    for ii in range(par['nIter']):
        # Takes 3 random points, and find the corresponding circle
        randEdges = np.random.permutation(len(x))[:3]
        (center, radius) = fitCircle(x[randEdges], y[randEdges])
        
        # Eliminate very small and very large circles, and centers outside the image
        if not (par['lowerRadius'] < radius < par['upperRadius'] and \
                0 <= center[0] < imSize[0] and \
                0 <= center[1] < imSize[1]):
            continue

        # Make sure a reasonable number of points lies near that circle
        centerDistance = np.sqrt((x-center[0])**2 + (y-center[1])**2)
        inCircle = np.where(np.abs(centerDistance-radius)<par['eps'])[0]
        inPts = len(inCircle)
        if inPts < par['ransac_threshold'] *4*np.pi*radius*par['eps'] or inPts < 3:
            continue
        
        # Fit a circle to all good points, and save the corresponding parameters
        (center, radius) = fitCircle(x[inCircle], y[inCircle])        
        fitted[ii] = (center, radius, inPts)
    
        # If you want to see these points:
        if debug_flag == 1:
            plt.plot(y,x,'.')
            plt.hold(True)
            plt.plot(y[inCircle], x[inCircle],'r.')
            plt.plot(y[randEdges], x[randEdges], 'g+', ms=15)
            plt.axis('equal')
            plt.show()
        
    # Sort the circles, according to number of points included
    fitted = np.sort(fitted,order='nPts')
        
    # Show the best-fitting circle
    plt.imshow(data, cmap='gray', origin='lower')
    drawCircle(fitted[-1]['center'], fitted[-1]['radius'])
    plt.show()
    
[docs]def fit_circle(x,y):
    '''
    Determine the best-fit circle to given datapoints.
    
    Parameters
    ----------
    x : array (N,)
        x-values.
    y : array (N,)
        corresponding y-values.

    Returns
    -------
    center : array (2,)
        x/y coordinates of center of the circle
    radius : float
        Circle radius.

    Examples
    --------
    >>> r = 2
    >>> center = np.r_[5,5]
    >>> theta = np.r_[0:2*np.pi:10j]
    >>> x = r*np.cos(theta)+center[0]
    >>> y = r*np.sin(theta)+center[1]
    >>> cFit,rFit = thLib.fits.fit_circle(x,y)
    
    '''
    M = np.vstack((2*x,2*y,np.ones(len(x)))).T
    (par,_,_,_) = np.linalg.lstsq(M,x**2+y**2)
    center = par[:2]
    radius = np.sqrt(par[2]+np.sum(center**2))
    return(center, radius)


[docs]def fit_exp(tFit, yFit, plotFlag=False): 
    '''
    Calculates best-fit parameters for the exponential decay to an offset.
    This can serve as an example for a general non-linear fit.

    Parameters
    ----------
    tFit : array (N,)
        Time values.
    yFit : array (N,)
        Function values

    Returns
    -------
    offset : float
        Function offset/bias.
    amp : float
        Amplitude of exponential function
    tau : float
        Decay time.

    Examples
    --------
    
    >>> t = np.arange(10)
    >>> tau = 2.
    >>> amp = 1.
    >>> offset = 2.
    >>> x = offset + amp*np.exp(-t/tau)    
    >>> fitted =  thLib.fits.fit_exp(t,x)

    ''' 

    from scipy import optimize 
     
    # Define the fit-function and the error-function 
    fitfunc = lambda p, x: p[0] + p[1]*np.exp(-x/p[2]) 
    errfunc  = lambda p,x,y: fitfunc(p,x) - y 
     
    pInit = np.r_[0, 1, 1]  # Initial values 
     
    # Make the fit 
    pFit, success = optimize.leastsq(errfunc, pInit, args=(tFit, yFit)) 
     
    if plotFlag:
        # Plot the data and the fit 
        plt.plot(tFit, yFit, label='rawdata')
        plt.hold(True)
        plt.plot(tFit, fitfunc(pFit,tFit), label='fit') 
        plt.legend()
        plt.show() 

    ExpFit = namedtuple('ExpFit', ['offset', 'amplitude', 'tau'])
    return ExpFit(*pFit)

[docs]def fit_line(x, y, alpha=0.05, newx=[], plotFlag=False):
    """
    Linear regression fit.

    Parameters
    ----------
    x : ndarray
        Input / Predictor.
    y : ndarray
        Input / Estimator.
    alpha : float
        Confidence limit [default=0.05]
    newx : float or ndarray
        Values for which the fit and the prediction limits are calculated (optional)
    plotFlag: int, optional
        1 = plot, 0 = no_plot [default]
        
    Returns
    -------
    a : float
        Intercept
    b : float
        Slope
    ci : ndarray
        Lower and upper confidence interval for the slope
    info : dictionary
        contains return information on
        - residuals
        - var_res
        - sd_res
        - alpha
        - tval
        - df
    newy : list(ndarray)
        Predictions for (newx, newx-ciPrediction, newx+ciPrediction)

    Examples
    --------
    >>> x = np.r_[0:10:11j]
    >>> y = x**2
    >>> (a,b,(ci_a, ci_b),_) = thLib.fits.fit_line(x,y)    
    Summary: a=-15.0000+/-12.4590, b=10.0000+/-2.1060
    Confidence intervals: ci_a=(-27.4590 - -2.5410), ci_b=(7.8940 - 12.1060)
    Residuals: variance = 95.3333, standard deviation = 9.7639
    alpha = 0.050, tval = 2.2622, df=9


    Notes
    -----
    Example data and formulas are taken from
    D. Altman, "Practical Statistics for Medicine"
    """
    
    # Summary data
    n = len(x)			   # number of samples     
    
    Sxx = np.sum(x**2) - np.sum(x)**2./n
#    Syy = np.sum(y**2) - np.sum(y)**2./n    # not needed here
    Sxy = np.sum(x*y) - np.sum(x)*np.sum(y)/np.float(n)
    mean_x = np.mean(x)
    mean_y = np.mean(y)
    
    # Linefit
    b = Sxy/Sxx
    a = mean_y - b*mean_x
    
    # Residuals
    fit = lambda xx: a + b*xx    
    residuals = y - fit(x)
    
    var_res = np.sum(residuals**2)/np.float((n-2))
    sd_res = np.sqrt(var_res)
    
    # Confidence intervals
    se_b = sd_res/np.sqrt(Sxx)
    se_a = sd_res*np.sqrt(np.sum(x**2)/np.float(n*Sxx))
    
    df = n-2                            # degrees of freedom
    tval = sp.stats.t.isf(alpha/2., df) 	# appropriate t value
    
    ci_a = a + tval*se_a*np.array([-1,1])
    ci_b = b + tval*se_b*np.array([-1,1])

    # create series of new test x-values to predict for
    npts = 100
    px = np.linspace(np.min(x),np.max(x),num=npts)
    
    se_fit     = lambda x: sd_res * np.sqrt(  1./n + (x-mean_x)**2./Sxx)
    se_predict = lambda x: sd_res * np.sqrt(1+1./n + (x-mean_x)**2./Sxx)
    
    print('Summary: a={0:5.4f}+/-{1:5.4f}, b={2:5.4f}+/-{3:5.4f}'.format(a,tval*se_a,b,tval*se_b))
    print('Confidence intervals: ci_a=({0:5.4f} - {1:5.4f}), ci_b=({2:5.4f} - {3:5.4f})'.format(ci_a[0], ci_a[1], ci_b[0], ci_b[1]))
    print('Residuals: variance = {0:5.4f}, standard deviation = {1:5.4f}'.format(var_res, sd_res))
    print('alpha = {0:.3f}, tval = {1:5.4f}, df={2:d}'.format(alpha, tval, df))
    
    # Return info
    ri = {'residuals': residuals, 
        'var_res': var_res,
        'sd_res': sd_res,
        'alpha': alpha,
        'tval': tval,
        'df': df}
    
    if plotFlag:
        # Plot the data
        plt.figure()
        
        plt.plot(px, fit(px),'k', label='Regression line')
        #plt.plot(x,y,'k.', label='Sample observations', ms=10)
        plt.plot(x,y,'k.')
        
        x.sort()
        limit = (1-alpha)*100
        plt.plot(x, fit(x)+tval*se_fit(x), 'r--', lw=2, label='Confidence limit ({0:.1f}%)'.format(limit))
        plt.plot(x, fit(x)-tval*se_fit(x), 'r--', lw=2 )
        
        plt.plot(x, fit(x)+tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2), label='Prediction limit ({0:.1f}%)'.format(limit))
        plt.plot(x, fit(x)-tval*se_predict(x), '--', lw=2, color=(0.2,1,0.2))

        plt.xlabel('X values')
        plt.ylabel('Y values')
        plt.title('Linear regression and confidence limits')
        
        # configure legend
        plt.legend(loc=0)
        leg = plt.gca().get_legend()
        ltext = leg.get_texts()
        plt.setp(ltext, fontsize=10)

        # show the plot
        plt.show()
     
    if newx != []:
        try:
            newx.size
        except AttributeError:
            newx = np.array([newx])
    
        print('Example: x = {0}+/-{1} => se_fit = {2:5.4f}, se_predict = {3:6.5f}'\
        .format(newx[0], tval*se_predict(newx[0]), se_fit(newx[0]), se_predict(newx[0])))
        
        newy = (fit(newx), fit(newx)-se_predict(newx), fit(newx)+se_predict(newx))
        
        LineFit = namedtuple('LineFit',['intercept', 'slope', 'CIs', 'info', 'yFitted'])
        return LineFit(a,b,(ci_a, ci_b), ri, newy)
    else:
        LineFit = namedtuple('LineFit',['intercept', 'slope', 'CIs', 'info'])
        return LineFit(a,b,(ci_a, ci_b), ri)

 
[docs]def fit_sin(tList, yList, freq):
    '''
    Fit a sine wave with a known frequency to a given set of data.
   
    y = amplitude * sin(2*pi*freq * tList + phase*pi/180) + bias
   
    Parameters
    ----------
    yList : array
           datapoints
    tList : float
           time base, in sec
    freq : float
          in Hz
       
    Returns
    -------
    phase : float
        in degrees
    amplitude : float
    bias : float
   
    Examples
    --------
    >>> np.random.seed(1234)
    >>> t = np.arange(0,10,0.1)
    >>> x = 3 + 4*np.sin(2*np.pi*t + 5*np.pi/180) + np.random.randn(len(t))
    >>> (phase, amp, offset) = thLib.fits.fit_sin(t, x, 1)
    
    '''

    # Get input data
    b = yList
    rows = [ [np.sin(freq*2*np.pi*t), np.cos(freq*2*np.pi*t), 1] for t in tList]
    A = np.array(rows)
    
    # Make the fit
    (w,residuals,rank,sing_vals) = np.linalg.lstsq(A,b)
    
    # Extract desired parameters ...
    phase = np.arctan2(w[1],w[0])*180/np.pi
    amplitude = np.linalg.norm([w[0],w[1]],2)
    bias = w[2]
    
    # ... and return them
    SinFit = namedtuple('SinFit', ['phase', 'amp', 'offset'])
    return SinFit(phase,amplitude,bias)
 
[docs]def fit_ellipse(x,y):
    '''
    Ellipse fit by Taubin's Method
    
    Parameters
    ----------
    x : array
        x-coordinates of the ellipse points
    y : array
        y-coordinates of the ellipse points
        
    Returns
    -------
    A : array
        Ellipse parameters
        A = [a b c d e f]
        is the vector of algebraic parameters of thefitting ellipse:
        ax^2 + bxy + cy^2 +dx + ey + f = 0
        The vector A is normed, so that ||A||=1.
    
    Notes
    -----
    Among fast non-iterative ellipse fitting methods, 
    this is perhaps the most accurate and robust.
    
    This method fits a quadratic curve (conic) to a set of points;
    if points are better approximated by a hyperbola, this fit will 
    return a hyperbola. To fit ellipses only, use "Direct Ellipse Fit".
    
    Published in
    G. Taubin, "Estimation Of Planar Curves, Surfaces And Nonplanar
    Space Curves Defined By Implicit Equations, With 
    Applications To Edge And Range Image Segmentation",
    IEEE Trans. PAMI, Vol. 13, pages 1115-1138, (1991)
    
    '''

    centroid = np.mean( np.vstack((x,y)), 1 )   # the centroid of the data set
    
    Z = np.vstack(( (x-centroid[0])**2,
                    (x-centroid[0])*(y-centroid[1]),
                    (y-centroid[1])**2,
                     x-centroid[0],
                     y-centroid[1],
                    np.ones(len(x)) )).T;

    M = Z.T.dot(Z) / len(x)
    
    P = np.array(
         [[ M[0,0]-M[0,5]**2, M[0,1]-M[0,5]*M[1,5], M[0,2]-M[0,5]*M[2,5], M[0,3], M[0,4] ],
          [ M[0,1]-M[0,5]*M[1,5], M[1,1]-M[1,5]**2, M[1,2]-M[1,5]*M[2,5], M[1,3], M[1,4] ],
          [ M[0,2]-M[0,5]*M[2,5], M[1,2]-M[1,5]*M[2,5], M[2,2]-M[2,5]**2, M[2,3], M[2,4] ],
          [ M[0,3], M[1,3], M[2,3], M[3,3], M[3,4] ],
          [ M[0,4], M[1,4], M[2,4], M[3,4], M[4,4] ]])
    
    Q = np.array(
         [[ 4*M[0,5], 2*M[1,5], 0, 0, 0],
          [ 2*M[1,5], M[0,5]+M[2,5], 2*M[1,5], 0, 0],
          [ 0, 2*M[1,5], 4*M[2,5], 0, 0],
          [ 0, 0, 0, 1, 0],
          [ 0, 0, 0, 0, 1]])
    
    w, vr = sp.linalg.eig(P,Q)
    sortID = np.argsort(w)
    
    A = vr[:,sortID[0]]
    A = np.hstack( (A, -A[:3].T.dot(M[:3,5])) )
    A4 = A[3] - 2*A[0]*centroid[0] - A[1]*centroid[1]
    A5 = A[4] - 2*A[2]*centroid[1] - A[1]*centroid[0]
    A6 = A[5] + A[0]*centroid[0]**2 +A[2]*centroid[1]**2+ \
         A[1]*centroid[0]*centroid[1] - A[3]*centroid[0]-A[4]*centroid[1]
    A[3] = A4
    A[4] = A5
    A[5] = A6

    A = A/np.linalg.norm(A);
    return A

[docs]def regress(y, X, alpha=0.05, intercept=True):
    '''
    Multilinear regression and confidence intervals.
    Note that by default, an intercept is added to the design matrix!

    Parameters
    ----------
    X : ndarray (N,) or (N,p)
        predictors at each of N observations
    y : ndarray(N,)
        observed responses
    alpha : float, optional
        Defines the 100*(1-alpha)% confidence level in ci.
        Default alpha=0.05
    intercept : boolean
        If 'True', an intercept is automatically added to the design matrix.
        If 'False', the behavior is like the Matlab "regress" function, and
        no intercept is added.

    Returns
    -------
    fit : float
        best fit intercept and regression parameters
    ci : ndarray (2,)
        confidence intervals for the coefficient estimates
        at the given alpha level (default: 95%-level)

    Examples
    --------
    >>> x = np.array([0, 0, 10, 10])
    >>> y = np.array([1, 3, 11, 13])
    >>> (fit,ci) = thLib.fits.regress(y,x)    

    >>> X = np.random.randn(10,2)
    >>> mat = np.hstack( (np.ones( (len(X),1) ), X) )
    >>> pars = np.c_[[2, 4, 5]]
    >>> y = mat.dot(pars)
    >>> y += 0.1*np.random.randn(*y.shape)
    >>> (fit,ci) = thLib.fits.regress(y, mat)    

    See also
    --------
    fit_line

    '''

        
    if X.ndim == 1:
        dof = len(X)-2
        df = pd.DataFrame({'x':X, 'y':y})
        modelTxt = 'y~x'
        
        # If you want the Matlab "regress", without intercept
        if intercept == False:
            modelTxt += '-1'
            dof += 1
            
        # Fit the model
        model = ols(modelTxt,df).fit()
        ci = np.zeros((2, 2))
    else:
        # Make sure the dimensions are right
        if X.shape[0] < X.shape[1]:
            X = X.T
            
        dof = X.shape[0]-X.shape[1]-1
        if y.ndim == 1:
            y = np.c_[y]
        # For automatic labeling, put all the data into one matrix
        data = np.hstack((y,X))
        
        df = pd.DataFrame(data)
        # Make a string like 'y~x1+x2+x3'
        labels = ['y']
        modelTxt = 'y~'
        for ii in range(X.shape[1]):
            labels.append('x'+str(ii))
            modelTxt += labels[-1]+'+'
            
        # Remove the last "+"
        modelTxt = modelTxt[:-1]
        
        # Automatic column labelling
        df.columns = labels
        
        # Memory allocation for CIs
        if intercept == True:
            ci = np.zeros((1+X.shape[1],2))
        # If you want the Matlab "regress", without intercept
        else:
            dof +=1
            modelTxt += '-1'
            ci = np.zeros((X.shape[1],2))
            
        # Fit the model
        model = ols(modelTxt,df).fit()
        
    level = (1.-alpha/2.)
    # Make sure that you have the correct DOF
    tVal = sp.stats.t.ppf(level,dof)
    
    # Extract parameters and standard error from the model
    fit = model.params
    se = model.bse
    
    # Calculate CIs
    ci[:,0] = fit - se*tVal
    ci[:,1] = fit + se*tVal
    
    # Return a named tuple
    Regression= namedtuple('Regression', ['Fit', 'CIs'])
    
    return Regression(fit, ci)
    
if __name__=='__main__':

    # Produce an ellipse
    a = 5
    b = 3
    alpha = np.deg2rad(30) 
    R = np.array([[np.cos(alpha), -np.sin(alpha)],
                   [np.sin(alpha), np.cos(alpha)]])
    
    theta = np.linspace(0, 2*np.pi, 100)
    x = a * np.cos(theta)
    y = b * np.sin(theta)    
    XY = np.vstack( (x,y) )
    ellipse = R.dot( XY )    
    
    plt.plot(ellipse[0,:], ellipse[1,:])    
    plt.show()

    A = fit_ellipse(ellipse[0,:], ellipse[1,:])
    print('The ellipse-fit parameters are:')
    print(A)
    
    # Check (equations from http://mathworld.wolfram.com/Ellipse.html)
    (a,b,c,d,e,f) = A
    
    # To match the formula from wolfram
    b /= 2
    d /= 2
    g = f
    f = e/2
    
    aFit = np.sqrt(2*(a*f**2 + c*d**2 + g*b**2 - 2*b*d*f - a*c*g)/((b**2-a*c)* (np.sqrt((a-c)**2 + 4*b**2) -(a+c))))
    
    bFit = np.sqrt(2*(a*f**2 + c*d**2 + g*b**2 - 2*b*d*f - a*c*g)/((b**2-a*c)*(-np.sqrt((a-c)**2 + 4*b**2) - (a+c))))
    
    thetaFit = np.pi/2 + 0.5*np.arctan2(2*b, a-c)
    print('a={0:3.2f}, b={1:3.2f}, theta = {2}'.format(aFit, bFit, np.rad2deg(thetaFit)))
    
    
    
    
    '''
    # Test fit_exp
    t = np.arange(10)
    tau = 2.
    amp = 1.
    offset = 2.
    x = offset + amp*np.exp(-t/float(tau))    
    print(fit_exp(t,x))
    
    import doctest
    doctest.testmod()    # Test fit_line
        
    print('Fit line ------------')
    x = np.array([0, 0, 10, 10])
    y = np.array([1, 3, 11, 13])
    out = fit_line(x,y)
    print(out)
    
    # Test regress - 1dim
    print('Plain -----------------')
    x = np.array([0, 0, 10, 10])
    y = np.array([1, 3, 11, 13])
    (fit,ci) = regress(y,x)    
    print(fit)
    print(ci)

    print('99% -----------------')
    (fit,ci) = regress(y,x, alpha=0.01)    
    print(fit)
    print(ci)
    
    print('No intercept -------')
    (fit,ci) = regress(y,x, intercept=False)    
    print(fit)
    print(ci)
    
    # Test regress - 2dim
    print('2-dim -----------')
    X = np.array([[1,1,1,1],[0,0,10,10]]).T
    y = np.array([1, 3, 11, 13])
    (fit,ci) = regress(y, X, intercept=False)    
    print(fit)
    print(ci)

    # Test regress - 2dim
    X = np.random.randn(10,2)
    mat = np.hstack( (np.ones( (len(X),1) ), X) )
    pars = np.c_[[2, 4, 5]]
    y = mat.dot(pars)
    y += 0.1*np.random.randn(*y.shape)
    (fit,ci) = regress(y, mat)    
    print(fit)     
    print(ci)
    '''