# coding=utf-8
"""
Wilsonian (1967) family of gravity-type spatial interaction models
References
----------
Fotheringham, A. S. and O'Kelly, M. E. (1989). Spatial Interaction Models: Formulations
and Applications. London: Kluwer Academic Publishers.
Wilson, A. G. (1967). A statistical theory of spatial distribution models.
Transportation Research, 1, 253–269.
"""
__author__ = "Taylor Oshan tayoshan@gmail.com"
from types import FunctionType
import numpy as np
from scipy import sparse as sp
from spreg import user_output as User
from spreg.utils import sphstack
from spglm.utils import cache_readonly
from .count_model import CountModel
from .utils import sorensen, srmse, spcategorical
[docs]class BaseGravity(CountModel):
"""
Base class to set up gravity-type spatial interaction models and dispatch
estimaton technqiues.
Parameters
----------
flows : array of integers
n x 1; observed flows between O origins and D destinations
origins : array of strings
n x 1; unique identifiers of origins of n flows
destinations : array of strings
n x 1; unique identifiers of destinations of n flows
cost : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cost_func : string or function that has scalar input and output
functional form of the cost function;
'exp' | 'pow' | custom function
o_vars : array (optional)
n x p; p attributes for each origin of n flows; default
is None
d_vars : array (optional)
n x p; p attributes for each destination of n flows;
default is None
constant : boolean
True to include intercept in model; True by default
framework : string
estimation technique; currently only 'GLM' is avaialble
Quasi : boolean
True to estimate QuasiPoisson model; should result in same
parameters as Poisson but with altered covariance; default
to true which estimates Poisson model
SF : array
n x 1; eigenvector spatial filter to include in the model;
default to None which does not include a filter; not yet
implemented
CD : array
n x 1; competing destination term that accounts for the
likelihood that alternative destinations are considered
along with each destination under consideration for every
OD pair; defaults to None which does not include a CD
term; not yet implemented
Lag : W object
spatial weight for n observations (OD pairs) used to
construct a spatial autoregressive model and estimator;
defaults to None which does not include an autoregressive
term; not yet implemented
Attributes
----------
f : array
n x 1; observed flows; dependent variable; y
n : integer
number of observations
k : integer
number of parameters
c : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cf : function
cost function; used to transform cost variable
ov : array
n x p(o); p attributes for each origin of n flows
dv : array
n x p(d); p attributes for each destination of n flows
constant : boolean
True to include intercept in model; True by default
y : array
n x 1; dependent variable used in estimation including any
transformations
X : array
n x k, design matrix used in estimation
params : array
n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
yhat : array
n x 1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (k x k) of betas
std_err : array
k x 1, standard errors of betas
pvalues : array
k x 1, two-tailed pvalues of parameters
tvalues : array
k x 1, the tvalues of the standard errors
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
resid_dev : array
n x 1, residual deviance of model
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of the loglikelihood function evaluated with only an
intercept; see family.py for distribution-specific
loglikelihoods
AIC : float
Akaike information criterion
D2 : float
percentage of explained deviance
adj_D2 : float
adjusted percentage of explained deviance
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
SRMSE : float
standardized root mean square error
SSI : float
Sorensen similarity index
results : object
full results from estimated model. May contain addtional
diagnostics
Example
-------
>>> import numpy as np
>>> import libpysal
>>> from spint.gravity import BaseGravity
>>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
>>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
>>> flows = np.array(db.by_col('count')).reshape((-1,1))
>>> model = BaseGravity(flows, cost)
>>> model.params
array([17.84839637, -1.68325787])
"""
[docs] def __init__(
self,
flows,
cost,
cost_func='pow',
o_vars=None,
d_vars=None,
origins=None,
destinations=None,
constant=True,
framework='GLM',
SF=None,
CD=None,
Lag=None,
Quasi=False):
n = User.check_arrays(flows, cost)
#User.check_y(flows, n)
self.n = n
self.f = flows
self.c = cost
self.ov = o_vars
self.dv = d_vars
if isinstance(cost_func, str):
if cost_func.lower() == 'pow':
self.cf = np.log
if (self.c == 0).any():
raise ValueError(
"Zero values detected: cost function 'pow'"
"requires the logarithm of the cost variable which"
"is undefined at 0")
elif cost_func.lower() == 'exp':
self.cf = lambda x: x * 1.0
elif (isinstance(cost_func, FunctionType)) | (isinstance(cost_func, np.ufunc)):
self.cf = cost_func
else:
raise ValueError(
"cost_func must be 'exp', 'pow' or a valid "
" function that has a scalar as a input and output")
y = np.reshape(self.f, (-1, 1))
if isinstance(self, Gravity):
X = np.empty((self.n, 0))
else:
X = sp.csr_matrix((self.n, 1))
if isinstance(self, Production) | isinstance(self, Doubly):
o_dummies = spcategorical(origins.flatten())
if constant:
o_dummies = o_dummies[:, 1:]
X = sphstack(X, o_dummies, array_out=False)
if isinstance(self, Attraction) | isinstance(self, Doubly):
d_dummies = spcategorical(destinations.flatten())
if constant | isinstance(self, Doubly):
d_dummies = d_dummies[:, 1:]
X = sphstack(X, d_dummies, array_out=False)
if self.ov is not None:
if isinstance(self, Gravity):
for each in range(self.ov.shape[1]):
if (self.ov[:, each] == 0).any():
raise ValueError(
"Zero values detected in column %s "
"of origin variables, which are undefined for "
"Poisson log-linear spatial interaction models" %
each)
X = np.hstack(
(X, np.log(np.reshape(self.ov[:, each], (-1, 1)))))
else:
for each in range(self.ov.shape[1]):
if (self.ov[:, each] == 0).any():
raise ValueError(
"Zero values detected in column %s "
"of origin variables, which are undefined for "
"Poisson log-linear spatial interaction models" %
each)
ov = sp.csr_matrix(
np.log(np.reshape(self.ov[:, each], ((-1, 1)))))
X = sphstack(X, ov, array_out=False)
if self.dv is not None:
if isinstance(self, Gravity):
for each in range(self.dv.shape[1]):
if (self.dv[:, each] == 0).any():
raise ValueError(
"Zero values detected in column %s "
"of destination variables, which are undefined for "
"Poisson log-linear spatial interaction models" %
each)
X = np.hstack(
(X, np.log(np.reshape(self.dv[:, each], (-1, 1)))))
else:
for each in range(self.dv.shape[1]):
if (self.dv[:, each] == 0).any():
raise ValueError(
"Zero values detected in column %s "
"of destination variables, which are undefined for "
"Poisson log-linear spatial interaction models" %
each)
dv = sp.csr_matrix(
np.log(np.reshape(self.dv[:, each], ((-1, 1)))))
X = sphstack(X, dv, array_out=False)
if isinstance(self, Gravity):
X = np.hstack((X, self.cf(np.reshape(self.c, (-1, 1)))))
else:
c = sp.csr_matrix(self.cf(np.reshape(self.c, (-1, 1))))
X = sphstack(X, c, array_out=False)
X = X[:, 1:] # because empty array instantiated with extra column
if not isinstance(self, (Gravity, Production, Attraction, Doubly)):
X = self.cf(np.reshape(self.c, (-1, 1)))
if SF:
raise NotImplementedError(
"Spatial filter model not yet implemented")
if CD:
raise NotImplementedError(
"Competing destination model not yet implemented")
if Lag:
raise NotImplementedError(
"Spatial Lag autoregressive model not yet implemented")
CountModel.__init__(self, y, X, constant=constant)
if (framework.lower() == 'glm'):
if not Quasi:
results = self.fit(framework='glm')
else:
results = self.fit(framework='glm', Quasi=True)
else:
raise NotImplementedError('Only GLM is currently implemented')
self.params = results.params
self.yhat = results.yhat
self.cov_params = results.cov_params
self.std_err = results.std_err
self.pvalues = results.pvalues
self.tvalues = results.tvalues
self.deviance = results.deviance
self.resid_dev = results.resid_dev
self.llf = results.llf
self.llnull = results.llnull
self.AIC = results.AIC
self.k = results.k
self.D2 = results.D2
self.adj_D2 = results.adj_D2
self.pseudoR2 = results.pseudoR2
self.adj_pseudoR2 = results.adj_pseudoR2
self.results = results
self._cache = {}
@cache_readonly
def SSI(self):
return sorensen(self)
@cache_readonly
def SRMSE(self):
return srmse(self)
def reshape(self, array):
if isinstance(array, np.ndarray):
return array.reshape((-1, 1))
elif isinstance(array, list):
return np.array(array).reshape((-1, 1))
else:
raise TypeError(
"input must be an numpy array or list that can be coerced"
" into the dimensions n x 1")
[docs]class Gravity(BaseGravity):
"""
Unconstrained (traditional gravity) gravity-type spatial interaction model
Parameters
----------
flows : array of integers
n x 1; observed flows between O origins and D destinations
cost : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cost_func : string or function that has scalar input and output
functional form of the cost function;
'exp' | 'pow' | custom function
o_vars : array (optional)
n x p; p attributes for each origin of n flows; default
is None
d_vars : array (optional)
n x p; p attributes for each destination of n flows;
default is None
constant : boolean
True to include intercept in model; True by default
framework : string
estimation technique; currently only 'GLM' is avaialble
Quasi : boolean
True to estimate QuasiPoisson model; should result in same
parameters as Poisson but with altered covariance; default
to true which estimates Poisson model
SF : array
n x 1; eigenvector spatial filter to include in the model;
default to None which does not include a filter; not yet
implemented
CD : array
n x 1; competing destination term that accounts for the
likelihood that alternative destinations are considered
along with each destination under consideration for every
OD pair; defaults to None which does not include a CD
term; not yet implemented
Lag : W object
spatial weight for n observations (OD pairs) used to
construct a spatial autoregressive model and estimator;
defaults to None which does not include an autoregressive
term; not yet implemented
Attributes
----------
f : array
n x 1; observed flows; dependent variable; y
n : integer
number of observations
k : integer
number of parameters
c : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cf : function
cost function; used to transform cost variable
ov : array
n x p(o); p attributes for each origin of n flows
dv : array
n x p(d); p attributes for each destination of n flows
constant : boolean
True to include intercept in model; True by default
y : array
n x 1; dependent variable used in estimation including any
transformations
X : array
n x k, design matrix used in estimation
params : array
n x k, k estimated beta coefficients; k = p(o) + p(d) + 1
yhat : array
n x 1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas
std_err : array
k x 1, standard errors of betas
pvalues : array
k x 1, two-tailed pvalues of parameters
tvalues : array
k x 1, the tvalues of the standard errors
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
resid_dev : array
n x 1, residual deviance of model
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of the loglikelihood function evaluated with only an
intercept; see family.py for distribution-specific
loglikelihoods
AIC : float
Akaike information criterion
D2 : float
percentage of explained deviance
adj_D2 : float
adjusted percentage of explained deviance
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
SRMSE : float
standardized root mean square error
SSI : float
Sorensen similarity index
results : object
Full results from estimated model. May contain addtional
diagnostics
Example
-------
>>> import numpy as np
>>> import libpysal
>>> from spint.gravity import Gravity
>>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
>>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
>>> flows = np.array(db.by_col('count')).reshape((-1,1))
>>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
>>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
>>> model = Gravity(flows, o_cap, d_cap, cost, 'exp')
>>> model.params
array([ 3.80050153e+00, 5.54103854e-01, 3.94282921e-01, -2.27091686e-03])
"""
[docs] def __init__(self, flows, o_vars, d_vars, cost,
cost_func, constant=True, framework='GLM', SF=None, CD=None,
Lag=None, Quasi=False):
self.f = np.reshape(flows, (-1, 1))
if len(o_vars.shape) > 1:
p = o_vars.shape[1]
else:
p = 1
self.ov = np.reshape(o_vars, (-1, p))
if len(d_vars.shape) > 1:
p = d_vars.shape[1]
else:
p = 1
self.dv = np.reshape(d_vars, (-1, p))
self.c = np.reshape(cost, (-1, 1))
#User.check_arrays(self.f, self.ov, self.dv, self.c)
BaseGravity.__init__(
self,
self.f,
self.c,
cost_func=cost_func,
o_vars=self.ov,
d_vars=self.dv,
constant=constant,
framework=framework,
SF=SF,
CD=CD,
Lag=Lag,
Quasi=Quasi)
def local(self, loc_index, locs):
"""
Calibrate local models for subsets of data from a single location to all
other locations
Parameters
----------
loc_index : n x 1 array of either origin or destination id label for
flows; must be explicitly provided for local version of
basic gravity model since these are not passed to the
global model.
locs : iterable of either origin or destination labels for which
to calibrate local models; must also be explicitly
provided since local gravity models can be calibrated from origins
or destinations. If all origins are also destinations and
a local model is desired for each location then use
np.unique(loc_index)
Returns
-------
results : dict where keys are names of model outputs and diagnostics
and values are lists of location specific values.
"""
results = {}
covs = self.ov.shape[1] + self.dv.shape[1] + 1
results['AIC'] = []
results['deviance'] = []
results['pseudoR2'] = []
results['adj_pseudoR2'] = []
results['D2'] = []
results['adj_D2'] = []
results['SSI'] = []
results['SRMSE'] = []
for cov in range(covs):
results['param' + str(cov)] = []
results['stde' + str(cov)] = []
results['pvalue' + str(cov)] = []
results['tvalue' + str(cov)] = []
for loc in locs:
subset = loc_index == loc
f = self.reshape(self.f[subset])
o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
dij = self.reshape(self.c[subset])
model = Gravity(f, o_vars, d_vars, dij, self.cf,
constant=False)
results['AIC'].append(model.AIC)
results['deviance'].append(model.deviance)
results['pseudoR2'].append(model.pseudoR2)
results['adj_pseudoR2'].append(model.adj_pseudoR2)
results['D2'].append(model.D2)
results['adj_D2'].append(model.adj_D2)
results['SSI'].append(model.SSI)
results['SRMSE'].append(model.SRMSE)
for cov in range(covs):
results['param' + str(cov)].append(model.params[cov])
results['stde' + str(cov)].append(model.std_err[cov])
results['pvalue' + str(cov)].append(model.pvalues[cov])
results['tvalue' + str(cov)].append(model.tvalues[cov])
return results
[docs]class Production(BaseGravity):
"""
Production-constrained (origin-constrained) gravity-type spatial interaction model
Parameters
----------
flows : array of integers
n x 1; observed flows between O origins and D destinations
origins : array of strings
n x 1; unique identifiers of origins of n flows; when
there are many origins it will be faster to use integers
rather than strings for id labels.
cost : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cost_func : string or function that has scalar input and output
functional form of the cost function;
'exp' | 'pow' | custom function
d_vars : array (optional)
n x p; p attributes for each destination of n flows;
default is None
constant : boolean
True to include intercept in model; True by default
framework : string
estimation technique; currently only 'GLM' is avaialble
Quasi : boolean
True to estimate QuasiPoisson model; should result in same
parameters as Poisson but with altered covariance; default
to true which estimates Poisson model
SF : array
n x 1; eigenvector spatial filter to include in the model;
default to None which does not include a filter; not yet
implemented
CD : array
n x 1; competing destination term that accounts for the
likelihood that alternative destinations are considered
along with each destination under consideration for every
OD pair; defaults to None which does not include a CD
term; not yet implemented
Lag : W object
spatial weight for n observations (OD pairs) used to
construct a spatial autoregressive model and estimator;
defaults to None which does not include an autoregressive
term; not yet implemented
Attributes
----------
f : array
n x 1; observed flows; dependent variable; y
n : integer
number of observations
k : integer
number of parameters
c : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cf : function
cost function; used to transform cost variable
o : array
n x 1; index of origin id's
dv : array
n x p; p attributes for each destination of n flows
constant : boolean
True to include intercept in model; True by default
y : array
n x 1; dependent variable used in estimation including any
transformations
X : array
n x k, design matrix used in estimation
params : array
n x k, k estimated beta coefficients; k = # of origins + p + 1
yhat : array
n x 1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas
std_err : array
k x 1, standard errors of betas
pvalues : array
k x 1, two-tailed pvalues of parameters
tvalues : array
k x 1, the tvalues of the standard errors
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
resid_dev : array
n x 1, residual deviance of model
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of the loglikelihood function evaluated with only an
intercept; see family.py for distribution-specific
loglikelihoods
AIC : float
Akaike information criterion
D2 : float
percentage of explained deviance
adj_D2 : float
adjusted percentage of explained deviance
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
SRMSE : float
standardized root mean square error
SSI : float
Sorensen similarity index
results : object
Full results from estimated model. May contain addtional
diagnostics
Example
-------
>>> import numpy as np
>>> import libpysal
>>> from spint.gravity import Production
>>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
>>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
>>> flows = np.array(db.by_col('count')).reshape((-1,1))
>>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
>>> d_cap = np.array(db.by_col('d_cap')).reshape((-1,1))
>>> model = Production(flows, o, d_cap, cost, 'exp')
>>> model.params[-4:]
array([ 1.34721352, 0.96357345, 0.85535775, -0.00227444])
"""
[docs] def __init__(self, flows, origins, d_vars, cost, cost_func, constant=True,
framework='GLM', SF=None, CD=None, Lag=None, Quasi=False):
self.constant = constant
self.f = self.reshape(flows)
self.o = self.reshape(origins)
try:
if d_vars.shape[1]:
p = d_vars.shape[1]
except BaseException:
p = 1
self.dv = np.reshape(d_vars, (-1, p))
self.c = self.reshape(cost)
#User.check_arrays(self.f, self.o, self.dv, self.c)
BaseGravity.__init__(
self,
self.f,
self.c,
cost_func=cost_func,
d_vars=self.dv,
origins=self.o,
constant=constant,
framework=framework,
SF=SF,
CD=CD,
Lag=Lag,
Quasi=Quasi)
def local(self, locs=None):
"""
Calibrate local models for subsets of data from a single location to all
other locations
Parameters
----------
locs : iterable of location (origins) labels; default is
None which calibrates a local model for each origin
Returns
-------
results : dict where keys are names of model outputs and diagnostics
and values are lists of location specific values
"""
results = {}
offset = 1
covs = self.dv.shape[1] + 1
results['AIC'] = []
results['deviance'] = []
results['pseudoR2'] = []
results['adj_pseudoR2'] = []
results['D2'] = []
results['adj_D2'] = []
results['SSI'] = []
results['SRMSE'] = []
for cov in range(covs):
results['param' + str(cov)] = []
results['stde' + str(cov)] = []
results['pvalue' + str(cov)] = []
results['tvalue' + str(cov)] = []
if locs is None:
locs = np.unique(self.o)
for loc in np.unique(locs):
subset = self.o == loc
f = self.reshape(self.f[subset])
o = self.reshape(self.o[subset])
d_vars = self.dv[subset.reshape(self.dv.shape[0]), :]
dij = self.reshape(self.c[subset])
model = Production(f, o, d_vars, dij, self.cf, constant=False)
results['AIC'].append(model.AIC)
results['deviance'].append(model.deviance)
results['pseudoR2'].append(model.pseudoR2)
results['adj_pseudoR2'].append(model.adj_pseudoR2)
results['D2'].append(model.D2)
results['adj_D2'].append(model.adj_D2)
results['SSI'].append(model.SSI)
results['SRMSE'].append(model.SRMSE)
for cov in range(covs):
results['param' + str(cov)].append(model.params[offset + cov])
results['stde' + str(cov)].append(model.std_err[offset + cov])
results['pvalue' +
str(cov)].append(model.pvalues[offset + cov])
results['tvalue' +
str(cov)].append(model.tvalues[offset + cov])
return results
[docs]class Attraction(BaseGravity):
"""
Attraction-constrained (destination-constrained) gravity-type spatial interaction model
Parameters
----------
flows : array of integers
n x 1; observed flows between O origins and D destinations
destinations : array of strings
n x 1; unique identifiers of destinations of n flows; when
there are many destinations it will be faster to use
integers over strings for id labels.
cost : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cost_func : string or function that has scalar input and output
functional form of the cost function;
'exp' | 'pow' | custom function
o_vars : array (optional)
n x p; p attributes for each origin of n flows; default
is None
constant : boolean
True to include intercept in model; True by default
y : array
n x 1; dependent variable used in estimation including any
transformations
X : array
n x k, design matrix used in estimation
framework : string
estimation technique; currently only 'GLM' is avaialble
Quasi : boolean
True to estimate QuasiPoisson model; should result in same
parameters as Poisson but with altered covariance; default
to true which estimates Poisson model
SF : array
n x 1; eigenvector spatial filter to include in the model;
default to None which does not include a filter; not yet
implemented
CD : array
n x 1; competing destination term that accounts for the
likelihood that alternative destinations are considered
along with each destination under consideration for every
OD pair; defaults to None which does not include a CD
term; not yet implemented
Lag : W object
spatial weight for n observations (OD pairs) used to
construct a spatial autoregressive model and estimator;
defaults to None which does not include an autoregressive
term; not yet implemented
Attributes
----------
f : array
n x 1; observed flows; dependent variable; y
n : integer
number of observations
k : integer
number of parameters
c : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cf : function
cost function; used to transform cost variable
d : array
n x 1; index of destination id's
ov : array
n x p; p attributes for each origin of n flows
constant : boolean
True to include intercept in model; True by default
params : array
n x k, k estimated beta coefficients; k = # of
destinations + p + 1
yhat : array
n x 1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas
std_err : array
k x 1, standard errors of betas
pvalues : array
k x 1, two-tailed pvalues of parameters
tvalues : array
k x 1, the tvalues of the standard errors
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
resid_dev : array
n x 1, residual deviance of model
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of the loglikelihood function evaluated with only an
intercept; see family.py for distribution-specific
loglikelihoods
AIC : float
Akaike information criterion
D2 : float
percentage of explained deviance
adj_D2 : float
adjusted percentage of explained deviance
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
SRMSE : float
standardized root mean square error
SSI : float
Sorensen similarity index
results : object
Full results from estimated model. May contain addtional
diagnostics
Example
-------
>>> import numpy as np
>>> import libpysal
>>> from spint.gravity import Attraction
>>> nyc_bikes = libpysal.examples.load_example('nyc_bikes')
>>> db = libpysal.io.open(nyc_bikes.get_path('nyc_bikes_ct.csv'))
>>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
>>> flows = np.array(db.by_col('count')).reshape((-1,1))
>>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
>>> o_cap = np.array(db.by_col('o_cap')).reshape((-1,1))
>>> model = Attraction(flows, d, o_cap, cost, 'exp')
>>> model.params[-4:]
array([ 1.21962276, 0.87634028, 0.88290909, -0.00229081])
"""
[docs] def __init__(self, flows, destinations, o_vars, cost, cost_func,
constant=True, framework='GLM', SF=None, CD=None, Lag=None,
Quasi=False):
self.f = np.reshape(flows, (-1, 1))
if len(o_vars.shape) > 1:
p = o_vars.shape[1]
else:
p = 1
self.ov = np.reshape(o_vars, (-1, p))
self.d = np.reshape(destinations, (-1, 1))
self.c = np.reshape(cost, (-1, 1))
#User.check_arrays(self.f, self.d, self.ov, self.c)
BaseGravity.__init__(
self,
self.f,
self.c,
cost_func=cost_func,
o_vars=self.ov,
destinations=self.d,
constant=constant,
framework=framework,
SF=SF,
CD=CD,
Lag=Lag,
Quasi=Quasi)
def local(self, locs=None):
"""
Calibrate local models for subsets of data from a single location to all
other locations
Parameters
----------
locs : iterable of location (destinations) labels; default is
None which calibrates a local model for each destination
Returns
-------
results : dict where keys are names of model outputs and diagnostics
and values are lists of location specific values
"""
results = {}
offset = 1
covs = self.ov.shape[1] + 1
results['AIC'] = []
results['deviance'] = []
results['pseudoR2'] = []
results['adj_pseudoR2'] = []
results['D2'] = []
results['adj_D2'] = []
results['SSI'] = []
results['SRMSE'] = []
for cov in range(covs):
results['param' + str(cov)] = []
results['stde' + str(cov)] = []
results['pvalue' + str(cov)] = []
results['tvalue' + str(cov)] = []
if locs is None:
locs = np.unique(self.d)
for loc in np.unique(locs):
subset = self.d == loc
f = self.reshape(self.f[subset])
d = self.reshape(self.d[subset])
o_vars = self.ov[subset.reshape(self.ov.shape[0]), :]
dij = self.reshape(self.c[subset])
model = Attraction(f, d, o_vars, dij, self.cf, constant=False)
results['AIC'].append(model.AIC)
results['deviance'].append(model.deviance)
results['pseudoR2'].append(model.pseudoR2)
results['adj_pseudoR2'].append(model.adj_pseudoR2)
results['D2'].append(model.D2)
results['adj_D2'].append(model.adj_D2)
results['SSI'].append(model.SSI)
results['SRMSE'].append(model.SRMSE)
for cov in range(covs):
results['param' + str(cov)].append(model.params[offset + cov])
results['stde' + str(cov)].append(model.std_err[offset + cov])
results['pvalue' +
str(cov)].append(model.pvalues[offset + cov])
results['tvalue' +
str(cov)].append(model.tvalues[offset + cov])
return results
[docs]class Doubly(BaseGravity):
"""
Doubly-constrained gravity-type spatial interaction model
Parameters
----------
flows : array of integers
n x 1; observed flows between O origins and D destinations
origins : array of strings
n x 1; unique identifiers of origins of n flows; when
there are many origins it will be faster to use integers
rather than strings for id labels.
destinations : array of strings
n x 1; unique identifiers of destinations of n flows; when
there are many destinations it will be faster to use
integers rather than strings for id labels
cost : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cost_func : string or function that has scalar input and output
functional form of the cost function;
'exp' | 'pow' | custom function
constant : boolean
True to include intercept in model; True by default
y : array
n x 1; dependent variable used in estimation including any
transformations
X : array
n x k, design matrix used in estimation
framework : string
estimation technique; currently only 'GLM' is avaialble
Quasi : boolean
True to estimate QuasiPoisson model; should result in same
parameters as Poisson but with altered covariance; default
to true which estimates Poisson model
SF : array
n x 1; eigenvector spatial filter to include in the model;
default to None which does not include a filter; not yet
implemented
CD : array
n x 1; competing destination term that accounts for the
likelihood that alternative destinations are considered
along with each destination under consideration for every
OD pair; defaults to None which does not include a CD
term; not yet implemented
Lag : W object
spatial weight for n observations (OD pairs) used to
construct a spatial autoregressive model and estimator;
defaults to None which does not include an autoregressive
term; not yet implemented
Attributes
----------
f : array
n x 1; observed flows; dependent variable; y
n : integer
number of observations
k : integer
number of parameters
c : array
n x 1; cost to overcome separation between each origin and
destination associated with a flow; typically distance or time
cf : function
cost function; used to transform cost variable
o : array
n x 1; index of origin id's
d : array
n x 1; index of destination id's
constant : boolean
True to include intercept in model; True by default
params : array
n x k, estimated beta coefficients; k = # of origins + #
of destinations; the first x-1 values
pertain to the x destinations (leaving out the first
destination to avoid perfect collinearity; no fixed
effect), the next x values pertain to the x origins, and the
final value is the distance decay coefficient
yhat : array
n x 1, predicted value of y (i.e., fittedvalues)
cov_params : array
Variance covariance matrix (kxk) of betas
std_err : array
k x 1, standard errors of betas
pvalues : array
k x 1, two-tailed pvalues of parameters
tvalues : array
k x 1, the tvalues of the standard errors
deviance : float
value of the deviance function evalued at params;
see family.py for distribution-specific deviance
resid_dev : array
n x 1, residual deviance of model
llf : float
value of the loglikelihood function evalued at params;
see family.py for distribution-specific loglikelihoods
llnull : float
value of the loglikelihood function evaluated with only an
intercept; see family.py for distribution-specific
loglikelihoods
AIC : float
Akaike information criterion
D2 : float
percentage of explained deviance
adj_D2 : float
adjusted percentage of explained deviance
pseudo_R2 : float
McFadden's pseudo R2 (coefficient of determination)
adj_pseudoR2 : float
adjusted McFadden's pseudo R2
SRMSE : float
standardized root mean square error
SSI : float
Sorensen similarity index
results : object
Full results from estimated model. May contain addtional
diagnostics
Example
-------
>>> import numpy as np
>>> import libpysal
>>> from spint.gravity import Doubly
>>> db = libpysal.io.open(libpysal.examples.get_path('nyc_bikes_ct.csv'))
>>> cost = np.array(db.by_col('tripduration')).reshape((-1,1))
>>> flows = np.array(db.by_col('count')).reshape((-1,1))
>>> d = np.array(db.by_col('d_tract')).reshape((-1,1))
>>> o = np.array(db.by_col('o_tract')).reshape((-1,1))
>>> model = Doubly(flows, o, d, cost, 'exp')
>>> model.params[-1:]
array([-0.00232112])
"""
[docs] def __init__(self, flows, origins, destinations, cost, cost_func,
constant=True, framework='GLM', SF=None, CD=None, Lag=None,
Quasi=False):
self.f = np.reshape(flows, (-1, 1))
self.o = np.reshape(origins, (-1, 1))
self.d = np.reshape(destinations, (-1, 1))
self.c = np.reshape(cost, (-1, 1))
#User.check_arrays(self.f, self.o, self.d, self.c)
BaseGravity.__init__(
self,
self.f,
self.c,
cost_func=cost_func,
origins=self.o,
destinations=self.d,
constant=constant,
framework=framework,
SF=SF,
CD=CD,
Lag=Lag,
Quasi=Quasi)
def local(self, locs=None):
"""
**Not inmplemented for doubly-constrained models** Not possible due to
insufficient degrees of freedom.
Calibrate local models for subsets of data from a single location to all
other locations
"""
raise NotImplementedError(
"Local models not possible for"
" doubly-constrained model due to insufficient degrees of freedom.")