Source code for spint.gravity

# coding=utf-8
"""
 Wilsonian (1967) family of gravity-type spatial interaction models

References
----------

Fotheringham, A. S. and O'Kelly, M. E. (1989). Spatial Interaction Models: Formulations
 and Applications. London: Kluwer Academic Publishers.

Wilson, A. G. (1967). A statistical theory of spatial distribution models.
 Transportation Research, 1, 253–269.

"""

__author__ = "Taylor Oshan tayoshan@gmail.com"

from types import FunctionType
import numpy as np
from scipy import sparse as sp
from spreg import user_output as User
from spreg.utils import sphstack
from spglm.utils import cache_readonly
from .count_model import CountModel
from .utils import sorensen, srmse, spcategorical


[docs]class BaseGravity(CountModel):
    """
    Base class to set up gravity-type spatial interaction models and dispatch
    estimaton technqiues.

    Parameters
    ----------
    flows           : array of integers
                      n x 1; observed flows between O origins and D destinations
    origins         : array of strings
                      n x 1; unique identifiers of origins of n flows
    destinations    : array of strings
                      n x 1; unique identifiers of destinations of n flows
    cost            : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cost_func       : string or function that has scalar input and output
                      functional form of the cost function;
                      'exp' | 'pow' | custom function
    o_vars          : array (optional)
                      n x p; p attributes for each origin of  n flows; default
                      is None
    d_vars          : array (optional)
                      n x p; p attributes for each destination of n flows;
                      default is None
    constant        : boolean
                      True to include intercept in model; True by default
    framework       : string
                      estimation technique; currently only 'GLM' is avaialble
    Quasi           : boolean
                      True to estimate QuasiPoisson model; should result in same
                      parameters as Poisson but with altered covariance; default
                      to true which estimates Poisson model
    SF              : array
                      n x 1; eigenvector spatial filter to include in the model;
                      default to None which does not include a filter; not yet
                      implemented
    CD              : array
                      n x 1; competing destination term that accounts for the
                      likelihood that alternative destinations are considered
                      along with each destination under consideration for every
                      OD pair; defaults to None which does not include a CD
                      term; not yet implemented
    Lag             : W object
                      spatial weight for n observations (OD pairs) used to
                      construct a spatial autoregressive model and estimator;
                      defaults to None which does not include an autoregressive
                      term; not yet implemented


    Attributes
    ----------
    f               : array
                      n x 1; observed flows; dependent variable; y
    n               : integer
                      number of observations
    k               : integer
                      number of parameters
    c               : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cf              : function
                      cost function; used to transform cost variable
    ov              : array
                      n x p(o); p attributes for each origin of n flows
    dv              : array
                      n x p(d); p attributes for each destination of n flows
    constant        : boolean
                      True to include intercept in model; True by default
    y               : array
                      n x 1; dependent variable used in estimation including any
                      transformations
    X               : array
                      n x k, design matrix used in estimation
    params          : array
                      n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
    yhat            : array
                      n x 1, predicted value of y (i.e., fittedvalues)
    cov_params      : array
                      Variance covariance matrix (k x k) of betas
    std_err         : array
                      k x 1, standard errors of betas
    pvalues         : array
                      k x 1, two-tailed pvalues of parameters
    tvalues         : array
                      k x 1, the tvalues of the standard errors
    deviance        : float
                      value of the deviance function evalued at params;
                      see family.py for distribution-specific deviance
    resid_dev       : array
                      n x 1, residual deviance of model
    llf             : float
                      value of the loglikelihood function evalued at params;
                      see family.py for distribution-specific loglikelihoods
    llnull          : float
                      value of the loglikelihood function evaluated with only an
                      intercept; see family.py for distribution-specific
                      loglikelihoods
    AIC             : float
                      Akaike information criterion
    D2              : float
                      percentage of explained deviance
    adj_D2          : float
                      adjusted percentage of explained deviance
    pseudo_R2       : float
                      McFadden's pseudo R2  (coefficient of determination)
    adj_pseudoR2    : float
                      adjusted McFadden's pseudo R2
    SRMSE           : float
                      standardized root mean square error
    SSI             : float
                      Sorensen similarity index
    results         : object
                      full results from estimated model. May contain addtional
                      diagnostics
    Example
    -------
    >>> import numpy as np
    >>> import libpysal
    >>> from spint.gravity import BaseGravity
    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
    >>> model = BaseGravity(flows, cost)
    >>> model.params
    array([17.84839637, -1.68325787])

    """

[docs]    def __init__(
            self,
            flows,
            cost,
            cost_func='pow',
            o_vars=None,
            d_vars=None,
            origins=None,
            destinations=None,
            constant=True,
            framework='GLM',
            SF=None,
            CD=None,
            Lag=None,
            Quasi=False):
        n = User.check_arrays(flows, cost)
        #User.check_y(flows, n)
        self.n = n
        self.f = flows
        self.c = cost
        self.ov = o_vars
        self.dv = d_vars
        if isinstance(cost_func, str):
            if cost_func.lower() == 'pow':
                self.cf = np.log
                if (self.c == 0).any():
                    raise ValueError(
                        "Zero values detected: cost function 'pow'"
                        "requires the logarithm of the cost variable which"
                        "is undefined at 0")
            elif cost_func.lower() == 'exp':
                self.cf = lambda x: x * 1.0
        elif (isinstance(cost_func, FunctionType)) | (isinstance(cost_func, np.ufunc)):
            self.cf = cost_func
        else:
            raise ValueError(
                "cost_func must be 'exp', 'pow' or a valid "
                " function that has a scalar as a input and output")

        y = np.reshape(self.f, (-1, 1))
        if isinstance(self, Gravity):
            X = np.empty((self.n, 0))
        else:
            X = sp.csr_matrix((self.n, 1))
        if isinstance(self, Production) | isinstance(self, Doubly):
            o_dummies = spcategorical(origins.flatten())
            if constant:
                o_dummies = o_dummies[:, 1:]
            X = sphstack(X, o_dummies, array_out=False)
        if isinstance(self, Attraction) | isinstance(self, Doubly):
            d_dummies = spcategorical(destinations.flatten())
            if constant | isinstance(self, Doubly):
                d_dummies = d_dummies[:, 1:]
            X = sphstack(X, d_dummies, array_out=False)
        if self.ov is not None:
            if isinstance(self, Gravity):
                for each in range(self.ov.shape[1]):
                    if (self.ov[:, each] == 0).any():
                        raise ValueError(
                            "Zero values detected in column %s "
                            "of origin variables, which are undefined for "
                            "Poisson log-linear spatial interaction models" %
                            each)
                    X = np.hstack(
                        (X, np.log(np.reshape(self.ov[:, each], (-1, 1)))))
            else:
                for each in range(self.ov.shape[1]):
                    if (self.ov[:, each] == 0).any():
                        raise ValueError(
                            "Zero values detected in column %s "
                            "of origin variables, which are undefined for "
                            "Poisson log-linear spatial interaction models" %
                            each)
                    ov = sp.csr_matrix(
                        np.log(np.reshape(self.ov[:, each], ((-1, 1)))))
                    X = sphstack(X, ov, array_out=False)
        if self.dv is not None:
            if isinstance(self, Gravity):
                for each in range(self.dv.shape[1]):
                    if (self.dv[:, each] == 0).any():
                        raise ValueError(
                            "Zero values detected in column %s "
                            "of destination variables, which are undefined for "
                            "Poisson log-linear spatial interaction models" %
                            each)
                    X = np.hstack(
                        (X, np.log(np.reshape(self.dv[:, each], (-1, 1)))))
            else:
                for each in range(self.dv.shape[1]):
                    if (self.dv[:, each] == 0).any():
                        raise ValueError(
                            "Zero values detected in column %s "
                            "of destination variables, which are undefined for "
                            "Poisson log-linear spatial interaction models" %
                            each)
                    dv = sp.csr_matrix(
                        np.log(np.reshape(self.dv[:, each], ((-1, 1)))))
                    X = sphstack(X, dv, array_out=False)
        if isinstance(self, Gravity):
            X = np.hstack((X, self.cf(np.reshape(self.c, (-1, 1)))))
        else:
            c = sp.csr_matrix(self.cf(np.reshape(self.c, (-1, 1))))
            X = sphstack(X, c, array_out=False)
            X = X[:, 1:]  # because empty array instantiated with extra column
        if not isinstance(self, (Gravity, Production, Attraction, Doubly)):
            X = self.cf(np.reshape(self.c, (-1, 1)))
        if SF:
            raise NotImplementedError(
                "Spatial filter model not yet implemented")
        if CD:
            raise NotImplementedError(
                "Competing destination model not yet implemented")
        if Lag:
            raise NotImplementedError(
                "Spatial Lag autoregressive model not yet implemented")

        CountModel.__init__(self, y, X, constant=constant)
        if (framework.lower() == 'glm'):
            if not Quasi:
                results = self.fit(framework='glm')
            else:
                results = self.fit(framework='glm', Quasi=True)
        else:
            raise NotImplementedError('Only GLM is currently implemented')

        self.params = results.params
        self.yhat = results.yhat
        self.cov_params = results.cov_params
        self.std_err = results.std_err
        self.pvalues = results.pvalues
        self.tvalues = results.tvalues
        self.deviance = results.deviance
        self.resid_dev = results.resid_dev
        self.llf = results.llf
        self.llnull = results.llnull
        self.AIC = results.AIC
        self.k = results.k
        self.D2 = results.D2
        self.adj_D2 = results.adj_D2
        self.pseudoR2 = results.pseudoR2
        self.adj_pseudoR2 = results.adj_pseudoR2
        self.results = results
        self._cache = {}

    @cache_readonly
    def SSI(self):
        return sorensen(self)

    @cache_readonly
    def SRMSE(self):
        return srmse(self)

    def reshape(self, array):
        if isinstance(array, np.ndarray):
            return array.reshape((-1, 1))
        elif isinstance(array, list):
            return np.array(array).reshape((-1, 1))
        else:
            raise TypeError(
                "input must be an numpy array or list that can be coerced"
                " into the dimensions n x 1")


[docs]class Gravity(BaseGravity):
    """
    Unconstrained (traditional gravity) gravity-type spatial interaction model

    Parameters
    ----------
    flows           : array of integers
                      n x 1; observed flows between O origins and D destinations
    cost            : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cost_func       : string or function that has scalar input and output
                      functional form of the cost function;
                      'exp' | 'pow' | custom function
    o_vars          : array (optional)
                      n x p; p attributes for each origin of  n flows; default
                      is None
    d_vars          : array (optional)
                      n x p; p attributes for each destination of n flows;
                      default is None
    constant        : boolean
                      True to include intercept in model; True by default
    framework       : string
                      estimation technique; currently only 'GLM' is avaialble
    Quasi           : boolean
                      True to estimate QuasiPoisson model; should result in same
                      parameters as Poisson but with altered covariance; default
                      to true which estimates Poisson model
    SF              : array
                      n x 1; eigenvector spatial filter to include in the model;
                      default to None which does not include a filter; not yet
                      implemented
    CD              : array
                      n x 1; competing destination term that accounts for the
                      likelihood that alternative destinations are considered
                      along with each destination under consideration for every
                      OD pair; defaults to None which does not include a CD
                      term; not yet implemented
    Lag             : W object
                      spatial weight for n observations (OD pairs) used to
                      construct a spatial autoregressive model and estimator;
                      defaults to None which does not include an autoregressive
                      term; not yet implemented

    Attributes
    ----------
    f               : array
                      n x 1; observed flows; dependent variable; y
    n               : integer
                      number of observations
    k               : integer
                      number of parameters
    c               : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cf              : function
                      cost function; used to transform cost variable
    ov              : array
                      n x p(o); p attributes for each origin of n flows
    dv              : array
                      n x p(d); p attributes for each destination of n flows
    constant        : boolean
                      True to include intercept in model; True by default
    y               : array
                      n x 1; dependent variable used in estimation including any
                      transformations
    X               : array
                      n x k, design matrix used in estimation
    params          : array
                      n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
    yhat            : array
                      n x 1, predicted value of y (i.e., fittedvalues)
    cov_params      : array
                      Variance covariance matrix (kxk) of betas
    std_err         : array
                      k x 1, standard errors of betas
    pvalues         : array
                      k x 1, two-tailed pvalues of parameters
    tvalues         : array
                      k x 1, the tvalues of the standard errors
    deviance        : float
                      value of the deviance function evalued at params;
                      see family.py for distribution-specific deviance
    resid_dev       : array
                      n x 1, residual deviance of model
    llf             : float
                      value of the loglikelihood function evalued at params;
                      see family.py for distribution-specific loglikelihoods
    llnull          : float
                      value of the loglikelihood function evaluated with only an
                      intercept; see family.py for distribution-specific
                      loglikelihoods
    AIC             : float
                      Akaike information criterion
    D2              : float
                      percentage of explained deviance
    adj_D2          : float
                      adjusted percentage of explained deviance
    pseudo_R2       : float
                      McFadden's pseudo R2  (coefficient of determination)
    adj_pseudoR2    : float
                      adjusted McFadden's pseudo R2
    SRMSE           : float
                      standardized root mean square error
    SSI             : float
                      Sorensen similarity index
    results         : object
                      Full results from estimated model. May contain addtional
                      diagnostics
    Example
    -------
    >>> import numpy as np
    >>> import libpysal
    >>> from spint.gravity import Gravity
    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
    >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
    >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
    >>> model = Gravity(flows, o_cap, d_cap, cost, 'exp')
    >>> model.params
    array([ 3.80050153e+00,  5.54103854e-01,  3.94282921e-01, -2.27091686e-03])

    """

[docs]    def __init__(self, flows, o_vars, d_vars, cost,
                 cost_func, constant=True, framework='GLM', SF=None, CD=None,
                 Lag=None, Quasi=False):
        self.f = np.reshape(flows, (-1, 1))
        if len(o_vars.shape) > 1:
            p = o_vars.shape[1]
        else:
            p = 1
        self.ov = np.reshape(o_vars, (-1, p))
        if len(d_vars.shape) > 1:
            p = d_vars.shape[1]
        else:
            p = 1
        self.dv = np.reshape(d_vars, (-1, p))
        self.c = np.reshape(cost, (-1, 1))
        #User.check_arrays(self.f, self.ov, self.dv, self.c)

        BaseGravity.__init__(
            self,
            self.f,
            self.c,
            cost_func=cost_func,
            o_vars=self.ov,
            d_vars=self.dv,
            constant=constant,
            framework=framework,
            SF=SF,
            CD=CD,
            Lag=Lag,
            Quasi=Quasi)

    def local(self, loc_index, locs):
        """
        Calibrate local models for subsets of data from a single location to all
        other locations

        Parameters
        ----------
        loc_index   : n x 1 array of either origin or destination id label for
                      flows; must be explicitly provided for local version of
                      basic gravity model since these are not passed to the
                      global model.

        locs        : iterable of either origin or destination labels for which
                      to calibrate local models; must also be explicitly
                      provided since local gravity models can be calibrated from origins
                      or destinations. If all origins are also destinations and
                      a local model is desired for each location then use
                      np.unique(loc_index)

        Returns
        -------
        results     : dict where keys are names of model outputs and diagnostics
                      and values are lists of location specific values.
        """
        results = {}
        covs = self.ov.shape[1] + self.dv.shape[1] + 1
        results['AIC'] = []
        results['deviance'] = []
        results['pseudoR2'] = []
        results['adj_pseudoR2'] = []
        results['D2'] = []
        results['adj_D2'] = []
        results['SSI'] = []
        results['SRMSE'] = []
        for cov in range(covs):
            results['param' + str(cov)] = []
            results['stde' + str(cov)] = []
            results['pvalue' + str(cov)] = []
            results['tvalue' + str(cov)] = []
        for loc in locs:
            subset = loc_index == loc
            f = self.reshape(self.f[subset])
            o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
            d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
            dij = self.reshape(self.c[subset])
            model = Gravity(f, o_vars, d_vars, dij, self.cf,
                            constant=False)
            results['AIC'].append(model.AIC)
            results['deviance'].append(model.deviance)
            results['pseudoR2'].append(model.pseudoR2)
            results['adj_pseudoR2'].append(model.adj_pseudoR2)
            results['D2'].append(model.D2)
            results['adj_D2'].append(model.adj_D2)
            results['SSI'].append(model.SSI)
            results['SRMSE'].append(model.SRMSE)
            for cov in range(covs):
                results['param' + str(cov)].append(model.params[cov])
                results['stde' + str(cov)].append(model.std_err[cov])
                results['pvalue' + str(cov)].append(model.pvalues[cov])
                results['tvalue' + str(cov)].append(model.tvalues[cov])
        return results


[docs]class Production(BaseGravity):
    """
    Production-constrained (origin-constrained) gravity-type spatial interaction model

    Parameters
    ----------
    flows           : array of integers
                      n x 1; observed flows between O origins and D destinations
    origins         : array of strings
                      n x 1; unique identifiers of origins of n flows; when
                      there are many origins it will be faster to use integers
                      rather than strings for id labels.
    cost            : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cost_func       : string or function that has scalar input and output
                      functional form of the cost function;
                      'exp' | 'pow' | custom function
    d_vars          : array (optional)
                      n x p; p attributes for each destination of n flows;
                      default is None
    constant        : boolean
                      True to include intercept in model; True by default
    framework       : string
                      estimation technique; currently only 'GLM' is avaialble
    Quasi           : boolean
                      True to estimate QuasiPoisson model; should result in same
                      parameters as Poisson but with altered covariance; default
                      to true which estimates Poisson model
    SF              : array
                      n x 1; eigenvector spatial filter to include in the model;
                      default to None which does not include a filter; not yet
                      implemented
    CD              : array
                      n x 1; competing destination term that accounts for the
                      likelihood that alternative destinations are considered
                      along with each destination under consideration for every
                      OD pair; defaults to None which does not include a CD
                      term; not yet implemented
    Lag             : W object
                      spatial weight for n observations (OD pairs) used to
                      construct a spatial autoregressive model and estimator;
                      defaults to None which does not include an autoregressive
                      term; not yet implemented

    Attributes
    ----------
    f               : array
                      n x 1; observed flows; dependent variable; y
    n               : integer
                      number of observations
    k               : integer
                      number of parameters
    c               : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cf              : function
                      cost function; used to transform cost variable
    o               : array
                      n x 1; index of origin id's
    dv              : array
                      n x p; p attributes for each destination of n flows
    constant        : boolean
                      True to include intercept in model; True by default
    y               : array
                      n x 1; dependent variable used in estimation including any
                      transformations
    X               : array
                      n x k, design matrix used in estimation
    params          : array
                      n x k, k estimated beta coefficients; k = # of origins + p + 1
    yhat            : array
                      n x 1, predicted value of y (i.e., fittedvalues)
    cov_params      : array
                      Variance covariance matrix (kxk) of betas
    std_err         : array
                      k x 1, standard errors of betas
    pvalues         : array
                      k x 1, two-tailed pvalues of parameters
    tvalues         : array
                      k x 1, the tvalues of the standard errors
    deviance        : float
                      value of the deviance function evalued at params;
                      see family.py for distribution-specific deviance
    resid_dev       : array
                      n x 1, residual deviance of model
    llf             : float
                      value of the loglikelihood function evalued at params;
                      see family.py for distribution-specific loglikelihoods
    llnull          : float
                      value of the loglikelihood function evaluated with only an
                      intercept; see family.py for distribution-specific
                      loglikelihoods
    AIC             : float
                      Akaike information criterion
    D2              : float
                      percentage of explained deviance
    adj_D2          : float
                      adjusted percentage of explained deviance
    pseudo_R2       : float
                      McFadden's pseudo R2  (coefficient of determination)
    adj_pseudoR2    : float
                      adjusted McFadden's pseudo R2
    SRMSE           : float
                      standardized root mean square error
    SSI             : float
                      Sorensen similarity index
    results         : object
                      Full results from estimated model. May contain addtional
                      diagnostics
    Example
    -------

    >>> import numpy as np
    >>> import libpysal
    >>> from spint.gravity import Production
    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
    >>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
    >>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
    >>> model = Production(flows, o, d_cap, cost, 'exp')
    >>> model.params[-4:]
    array([ 1.34721352,  0.96357345,  0.85535775, -0.00227444])

    """

[docs]    def __init__(self, flows, origins, d_vars, cost, cost_func, constant=True,
                 framework='GLM', SF=None, CD=None, Lag=None, Quasi=False):
        self.constant = constant
        self.f = self.reshape(flows)
        self.o = self.reshape(origins)

        try:
            if d_vars.shape[1]:
                p = d_vars.shape[1]
        except BaseException:
            p = 1
        self.dv = np.reshape(d_vars, (-1, p))
        self.c = self.reshape(cost)
        #User.check_arrays(self.f, self.o, self.dv, self.c)

        BaseGravity.__init__(
            self,
            self.f,
            self.c,
            cost_func=cost_func,
            d_vars=self.dv,
            origins=self.o,
            constant=constant,
            framework=framework,
            SF=SF,
            CD=CD,
            Lag=Lag,
            Quasi=Quasi)

    def local(self, locs=None):
        """
        Calibrate local models for subsets of data from a single location to all
        other locations

        Parameters
        ----------
        locs        : iterable of location (origins) labels; default is
                      None which calibrates a local model for each origin

        Returns
        -------
        results     : dict where keys are names of model outputs and diagnostics
                      and values are lists of location specific values
        """
        results = {}
        offset = 1
        covs = self.dv.shape[1] + 1
        results['AIC'] = []
        results['deviance'] = []
        results['pseudoR2'] = []
        results['adj_pseudoR2'] = []
        results['D2'] = []
        results['adj_D2'] = []
        results['SSI'] = []
        results['SRMSE'] = []
        for cov in range(covs):
            results['param' + str(cov)] = []
            results['stde' + str(cov)] = []
            results['pvalue' + str(cov)] = []
            results['tvalue' + str(cov)] = []
        if locs is None:
            locs = np.unique(self.o)
        for loc in np.unique(locs):
            subset = self.o == loc
            f = self.reshape(self.f[subset])
            o = self.reshape(self.o[subset])
            d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
            dij = self.reshape(self.c[subset])
            model = Production(f, o, d_vars, dij, self.cf, constant=False)
            results['AIC'].append(model.AIC)
            results['deviance'].append(model.deviance)
            results['pseudoR2'].append(model.pseudoR2)
            results['adj_pseudoR2'].append(model.adj_pseudoR2)
            results['D2'].append(model.D2)
            results['adj_D2'].append(model.adj_D2)
            results['SSI'].append(model.SSI)
            results['SRMSE'].append(model.SRMSE)
            for cov in range(covs):
                results['param' + str(cov)].append(model.params[offset + cov])
                results['stde' + str(cov)].append(model.std_err[offset + cov])
                results['pvalue' +
                        str(cov)].append(model.pvalues[offset + cov])
                results['tvalue' +
                        str(cov)].append(model.tvalues[offset + cov])
        return results


[docs]class Attraction(BaseGravity):
    """
    Attraction-constrained (destination-constrained) gravity-type spatial interaction model

    Parameters
    ----------
    flows           : array of integers
                      n x 1; observed flows between O origins and D destinations
    destinations    : array of strings
                      n x 1; unique identifiers of destinations of n flows; when
                      there are many destinations it will be faster to use
                      integers over strings for id labels.
    cost            : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cost_func       : string or function that has scalar input and output
                      functional form of the cost function;
                      'exp' | 'pow' | custom function
    o_vars          : array (optional)
                      n x p; p attributes for each origin of  n flows; default
                      is None
    constant        : boolean
                      True to include intercept in model; True by default
    y               : array
                      n x 1; dependent variable used in estimation including any
                      transformations
    X               : array
                      n x k, design matrix used in estimation
    framework       : string
                      estimation technique; currently only 'GLM' is avaialble
    Quasi           : boolean
                      True to estimate QuasiPoisson model; should result in same
                      parameters as Poisson but with altered covariance; default
                      to true which estimates Poisson model
    SF              : array
                      n x 1; eigenvector spatial filter to include in the model;
                      default to None which does not include a filter; not yet
                      implemented
    CD              : array
                      n x 1; competing destination term that accounts for the
                      likelihood that alternative destinations are considered
                      along with each destination under consideration for every
                      OD pair; defaults to None which does not include a CD
                      term; not yet implemented
    Lag             : W object
                      spatial weight for n observations (OD pairs) used to
                      construct a spatial autoregressive model and estimator;
                      defaults to None which does not include an autoregressive
                      term; not yet implemented

    Attributes
    ----------
    f               : array
                      n x 1; observed flows; dependent variable; y
    n               : integer
                      number of observations
    k               : integer
                      number of parameters
    c               : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cf              : function
                      cost function; used to transform cost variable
    d               : array
                      n x 1; index of destination id's
    ov              : array
                      n x p; p attributes for each origin of n flows
    constant        : boolean
                      True to include intercept in model; True by default
    params          : array
                      n x k, k estimated beta coefficients; k = # of
                      destinations + p + 1
    yhat            : array
                      n x 1, predicted value of y (i.e., fittedvalues)
    cov_params      : array
                      Variance covariance matrix (kxk) of betas
    std_err         : array
                      k x 1, standard errors of betas
    pvalues         : array
                      k x 1, two-tailed pvalues of parameters
    tvalues         : array
                      k x 1, the tvalues of the standard errors
    deviance        : float
                      value of the deviance function evalued at params;
                      see family.py for distribution-specific deviance
    resid_dev       : array
                      n x 1, residual deviance of model
    llf             : float
                      value of the loglikelihood function evalued at params;
                      see family.py for distribution-specific loglikelihoods
    llnull          : float
                      value of the loglikelihood function evaluated with only an
                      intercept; see family.py for distribution-specific
                      loglikelihoods
    AIC             : float
                      Akaike information criterion
    D2              : float
                      percentage of explained deviance
    adj_D2          : float
                      adjusted percentage of explained deviance
    pseudo_R2       : float
                      McFadden's pseudo R2  (coefficient of determination)
    adj_pseudoR2    : float
                      adjusted McFadden's pseudo R2
    SRMSE           : float
                      standardized root mean square error
    SSI             : float
                      Sorensen similarity index
    results         : object
                      Full results from estimated model. May contain addtional
                      diagnostics
    Example
    -------
    >>> import numpy as np
    >>> import libpysal
    >>> from spint.gravity import Attraction
    >>> nyc_bikes = libpysal.examples.load_example('nyc_bikes')
    >>> db = libpysal.io.open(nyc_bikes.get_path('nyc_bikes_ct.csv'))
    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
    >>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
    >>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
    >>> model = Attraction(flows, d, o_cap, cost, 'exp')
    >>> model.params[-4:]
    array([ 1.21962276,  0.87634028,  0.88290909, -0.00229081])

    """

[docs]    def __init__(self, flows, destinations, o_vars, cost, cost_func,
                 constant=True, framework='GLM', SF=None, CD=None, Lag=None,
                 Quasi=False):
        self.f = np.reshape(flows, (-1, 1))
        if len(o_vars.shape) > 1:
            p = o_vars.shape[1]
        else:
            p = 1
        self.ov = np.reshape(o_vars, (-1, p))
        self.d = np.reshape(destinations, (-1, 1))
        self.c = np.reshape(cost, (-1, 1))
        #User.check_arrays(self.f, self.d, self.ov, self.c)

        BaseGravity.__init__(
            self,
            self.f,
            self.c,
            cost_func=cost_func,
            o_vars=self.ov,
            destinations=self.d,
            constant=constant,
            framework=framework,
            SF=SF,
            CD=CD,
            Lag=Lag,
            Quasi=Quasi)

    def local(self, locs=None):
        """
        Calibrate local models for subsets of data from a single location to all
        other locations

        Parameters
        ----------
        locs        : iterable of location (destinations) labels; default is
                      None which calibrates a local model for each destination

        Returns
        -------
        results     : dict where keys are names of model outputs and diagnostics
                      and values are lists of location specific values
        """
        results = {}
        offset = 1
        covs = self.ov.shape[1] + 1
        results['AIC'] = []
        results['deviance'] = []
        results['pseudoR2'] = []
        results['adj_pseudoR2'] = []
        results['D2'] = []
        results['adj_D2'] = []
        results['SSI'] = []
        results['SRMSE'] = []
        for cov in range(covs):
            results['param' + str(cov)] = []
            results['stde' + str(cov)] = []
            results['pvalue' + str(cov)] = []
            results['tvalue' + str(cov)] = []
        if locs is None:
            locs = np.unique(self.d)
        for loc in np.unique(locs):
            subset = self.d == loc
            f = self.reshape(self.f[subset])
            d = self.reshape(self.d[subset])
            o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
            dij = self.reshape(self.c[subset])
            model = Attraction(f, d, o_vars, dij, self.cf, constant=False)
            results['AIC'].append(model.AIC)
            results['deviance'].append(model.deviance)
            results['pseudoR2'].append(model.pseudoR2)
            results['adj_pseudoR2'].append(model.adj_pseudoR2)
            results['D2'].append(model.D2)
            results['adj_D2'].append(model.adj_D2)
            results['SSI'].append(model.SSI)
            results['SRMSE'].append(model.SRMSE)
            for cov in range(covs):
                results['param' + str(cov)].append(model.params[offset + cov])
                results['stde' + str(cov)].append(model.std_err[offset + cov])
                results['pvalue' +
                        str(cov)].append(model.pvalues[offset + cov])
                results['tvalue' +
                        str(cov)].append(model.tvalues[offset + cov])
        return results


[docs]class Doubly(BaseGravity):
    """
    Doubly-constrained gravity-type spatial interaction model

    Parameters
    ----------
    flows           : array of integers
                      n x 1; observed flows between O origins and D destinations
    origins         : array of strings
                      n x 1; unique identifiers of origins of n flows; when
                      there are many origins it will be faster to use integers
                      rather than strings for id labels.
    destinations    : array of strings
                      n x 1; unique identifiers of destinations of n flows; when
                      there are many destinations it will be faster to use
                      integers rather than strings for id labels
    cost            : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cost_func       : string or function that has scalar input and output
                      functional form of the cost function;
                      'exp' | 'pow' | custom function
    constant        : boolean
                      True to include intercept in model; True by default
    y               : array
                      n x 1; dependent variable used in estimation including any
                      transformations
    X               : array
                      n x k, design matrix used in estimation
    framework       : string
                      estimation technique; currently only 'GLM' is avaialble
    Quasi           : boolean
                      True to estimate QuasiPoisson model; should result in same
                      parameters as Poisson but with altered covariance; default
                      to true which estimates Poisson model
    SF              : array
                      n x 1; eigenvector spatial filter to include in the model;
                      default to None which does not include a filter; not yet
                      implemented
    CD              : array
                      n x 1; competing destination term that accounts for the
                      likelihood that alternative destinations are considered
                      along with each destination under consideration for every
                      OD pair; defaults to None which does not include a CD
                      term; not yet implemented
    Lag             : W object
                      spatial weight for n observations (OD pairs) used to
                      construct a spatial autoregressive model and estimator;
                      defaults to None which does not include an autoregressive
                      term; not yet implemented

    Attributes
    ----------
    f               : array
                      n x 1; observed flows; dependent variable; y
    n               : integer
                      number of observations
    k               : integer
                      number of parameters
    c               : array
                      n x 1; cost to overcome separation between each origin and
                      destination associated with a flow; typically distance or time
    cf              : function
                      cost function; used to transform cost variable
    o               : array
                      n x 1; index of origin id's
    d               : array
                      n x 1; index of destination id's
    constant        : boolean
                      True to include intercept in model; True by default
    params          : array
                      n x k, estimated beta coefficients; k = # of origins + #
                      of destinations; the first x-1 values
                      pertain to the x destinations (leaving out the first
                      destination to avoid perfect collinearity; no fixed
                      effect), the next x values pertain to the x origins, and the
                      final value is the distance decay coefficient
    yhat            : array
                      n x 1, predicted value of y (i.e., fittedvalues)
    cov_params      : array
                      Variance covariance matrix (kxk) of betas
    std_err         : array
                      k x 1, standard errors of betas
    pvalues         : array
                      k x 1, two-tailed pvalues of parameters
    tvalues         : array
                      k x 1, the tvalues of the standard errors
    deviance        : float
                      value of the deviance function evalued at params;
                      see family.py for distribution-specific deviance
    resid_dev       : array
                      n x 1, residual deviance of model
    llf             : float
                      value of the loglikelihood function evalued at params;
                      see family.py for distribution-specific loglikelihoods
    llnull          : float
                      value of the loglikelihood function evaluated with only an
                      intercept; see family.py for distribution-specific
                      loglikelihoods
    AIC             : float
                      Akaike information criterion
    D2              : float
                      percentage of explained deviance
    adj_D2          : float
                      adjusted percentage of explained deviance
    pseudo_R2       : float
                      McFadden's pseudo R2  (coefficient of determination)
    adj_pseudoR2    : float
                      adjusted McFadden's pseudo R2
    SRMSE           : float
                      standardized root mean square error
    SSI             : float
                      Sorensen similarity index
    results         : object
                      Full results from estimated model. May contain addtional
                      diagnostics
    Example
    -------
    >>> import numpy as np
    >>> import libpysal
    >>> from spint.gravity import Doubly
    >>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
    >>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
    >>> flows = np.array(db.by_col('count')).reshape((-1,1))
    >>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
    >>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
    >>> model = Doubly(flows, o, d, cost, 'exp')
    >>> model.params[-1:]
    array([-0.00232112])

    """

[docs]    def __init__(self, flows, origins, destinations, cost, cost_func,
                 constant=True, framework='GLM', SF=None, CD=None, Lag=None,
                 Quasi=False):

        self.f = np.reshape(flows, (-1, 1))
        self.o = np.reshape(origins, (-1, 1))
        self.d = np.reshape(destinations, (-1, 1))
        self.c = np.reshape(cost, (-1, 1))
        #User.check_arrays(self.f, self.o, self.d, self.c)

        BaseGravity.__init__(
            self,
            self.f,
            self.c,
            cost_func=cost_func,
            origins=self.o,
            destinations=self.d,
            constant=constant,
            framework=framework,
            SF=SF,
            CD=CD,
            Lag=Lag,
            Quasi=Quasi)

    def local(self, locs=None):
        """
        **Not inmplemented for doubly-constrained models** Not possible due to
        insufficient degrees of freedom.

        Calibrate local models for subsets of data from a single location to all
        other locations
        """
        raise NotImplementedError(
            "Local models not possible for"
            " doubly-constrained model due to insufficient degrees of freedom.")