/*---------------------------------------------------------------------------- Procedure: SUR.src Written by: Donald J Wyhowski Written: May 1, 2000 Last changed: June 7, 2000 Note........: This program estimates a system of equations using the iterated Seemingly Unrelated Regression (SUR) technique. -----------------------------------------------------------------------------*/ /* Format: Q = SUR(dataset,LHS_vars,RHS_vars,NUM_var,Restrict) Input: dataset -- string, name of GAUSS data set. LHS_vars -- character vector of all dependent variable names in the systems. Example: LHS_vars = { y1,y2,y3 }; RHS_vars -- character vector of all independent variable names in the systems. The order of the variable names must correspond to the order of the equations when they are stacked. Put "CONST" in the RHS_vars list if constant term is needed. Example: RHS_vars = { const,x1,x2,x3, @ 1st eqn. @ const,x2,x3,x4, @ 2nd eqn. @ const,x1,x3,x5 }; @ 3rd eqn. @ NUM_var -- numeric vector to determine the number of right- hand side variables in each equation. Following the above example: NUM_var = { 4,4,4 }; Restrict -- string, constrainted information on parameters to perform restricted estimation. The syntax of Restrict is as follows: Restrict="rest1, rest2,...., restN"; More than one restriction is allowed provided each is separated by commas. Each restriction must be written as a linear equation with all variables in the left-hand side and the constant in the right-hand side (i.e., x1:1+x1:2=1). Variables shown in each restriction must be variables in the regression model. Note that the numeric value following the (:) signifies which equation the variable comes from (i.e., X4:10 indicates the X4 variable comes from the 10th equation). Restrictions in the RESTRICT argument must be consistent and not redundant otherwise error messages will be given. Users should note that only the parameters associated with the variables are restricted, and not the variables in the model. Examples of some restrict arguments: 1) Restrict="x1:1 + x1:2 + x1:3 = 1"; 2) Restrict="const:1 + const:2 + const:3 = 1, trend:1 = 0, trend:2 = 0, trend:3 = 0"; Output: Q -- a "COMPACT" output vector containing all calculated statistics. See manual for more details on extracting information from it. Variables contained in Q are: nms -- name of the regressors. b -- regression coefficients. vc -- variance-covariance matrix of b. se -- standard error of b. s2 -- variance of the error. cx -- correlation matrix of b. rsq -- coefficient of determination. rbsq -- adjusted R-squared. dw -- Durbin-Watson statistic. nobs -- number of observations. sigma -- residual covariance matrix. sse -- residual sum of square. Globals: _lrdv -- scalar. Determines which divisor is used to compute the covariance matrix of the error. 0 T-(K/M) is used as divisor, where T is the number of observations, K is the number of all right-hand side variables in the systems, and M is the total number of equations. Hence, (K/M) is the average number of coefficients per equation. 1 T is used as divisor. Users are encouraged to use this, since it provides good asmptotic properties for the estimator. Default = 1. _lriter -- scalar. Sets the maximum number of iterations for the iterated seemingly unrelated regression. The iterative process is also subject to the convergence criterion _lrtol. Default = 1. __output -- scalar. If nonzero, results are printed. Default = 2. _lrpcor -- scalar. If 1, print the correlation matrix of all coefficients in the systems after convergence. Default = 0. _lrpcov -- scalar. If 1, print the covariance matrix of all coefficients in the systems after convergence. Default = 0. __range -- a 2 x 1 vector. Specifies the range of the data set to be used in estimation. The first element specifies the beginning observation while the second element specifies the ending observation. Example: __range = { 100,200 }. Default is { 0,0 } and uses the whole data set. _lrtol -- scalar. Specifies a convergence criterion to stop the iterative process. The iterative process will continue until either the iteration limit specified in _lriter is reached or the percentage change in the log of determinant of sigma is less than the convergence criterion. Default = 0.0001. __title -- string, message printed at the top of the results. Default =""; */ #include lr.ext; #include gauss.ext; proc(1) = sur(dataset,LHS_vars,RHS_vars,NUM_var,restrict); local oldtrap,start,counter,count1,lastobs,err,iter,maxiter,pcd, lnsig_o,lnsig_n,R,rank_R,z,invRCR,tobs,nobs,fp,nr,i,j,g,mk,lb,ub, lbi,ubi,lbj,ubj,sst,rsq,rbsq,readisk,what,vnames,names, indx,xzx,xzy,sig,isig,b,c,s,cm,se,se0,t1,t_temp,t2,df,dta, ixxtemp,xx,xy,e,yy,y,sse,ybar,sumy,tsumy,dw,tdw,ef,e1, errmsg,Y_index,X_index,Q,rr; if __output; call header("LINEAR SEEMINGLY UNRELATED REGRESSION", dataset,0); endif; dataset = "" $+ dataset; fp = -1; open fp = ^dataset; if fp == -1; goto errout("Data file: " $+ dataset,1); endif; if sumc(NUM_var) /= rows(RHS_vars); goto errout("# of RHS_vars = " $+ ftos(rows(RHS_vars),"%*.*lf",1,0) $+ " Total NUM_var = " $+ ftos(sumc(NUM_var),"%*.*lf",1,0),36); endif; { nr,start,counter,lastobs } = _rngchk(dataset,__range); nr=lastobs; if __output; print; if (start /= 1 or lastobs /= rowsf(fp)); print ("SAMPLE RANGE SET TO: " $+ ftos(__range[1],"%*.*lf",1,0) $+ " TO " $+ ftos(__range[2],"%*.*lf",1,0)); endif; if _lrdv == 1; print "DIVISOR USING N IN EFFECT"; endif; if type(restrict) == 13; print "RESTRICTIONS IN EFFECT "; endif; endif; tobs = lastobs-start+1; what = { const }; { vnames, indx } = indices(dataset,0); clear indx; if not what $/= RHS_vars; vnames = "CONST"|vnames; endif; if ismiss(indcv((LHS_vars|RHS_vars),vnames)); goto errout("Check the variable names carefully",2); endif; if type(restrict) == 13; restrict=chrs(packr(miss(miss(miss(vals(restrict),10),13),32))); { R,z }=SRMatrix(restrict,RHS_vars,NUM_var); if scalerr(R); goto errout("",scalerr(R)); endif; endif; Y_index = indcv(LHS_vars,vnames); X_index = indcv(RHS_vars,vnames); nobs = 0; xx=0; xy=0; call seekr(fp,start); count1=counter; do while count1 < lastobs; dta=readr(fp,nr); count1=count1+rows(dta); if count1 > lastobs; dta = trimr(dta,0,count1-lastobs); endif; dta=packr(dta); if ismiss(dta); continue; endif; nobs = nobs + rows(dta); if not what $/= RHS_vars; dta = ones(rows(dta),1)~dta; endif; xx = xx + dta[.,X_index]'*dta[.,X_index]; xy = xy + dta[.,X_index]'*dta[.,Y_index]; endo; g=rows(NUM_var); b=zeros(rows(RHS_vars),1); lb=1; ub=0; i=1; do while i <= g; ub = NUM_var[i] + ub; oldtrap = trapchk(65535); trap 1; ixxtemp=invpd(xx[lb:ub,lb:ub]); trap oldtrap; if scalerr(ixxtemp); goto errout("",30); endif; b[lb:ub]=ixxtemp*xy[lb:ub,i]; lb = ub + 1; i = i + 1; endo; if type(restrict) == 13; c=zeros(rows(RHS_vars),rows(RHS_vars)); lb=1; ub=0; i=1; do while i <= g; ub=NUM_var[i]+ub; c[lb:ub,lb:ub] = xx[lb:ub,lb:ub]; lb=ub+1; i=i+1; endo; c=invpd(c); oldtrap = trapchk(65535); trap 1; invRCR = invpd(R*c*R'); trap oldtrap; if scalerr(invRCR); goto errout("",30); endif; b = b - (c*R')*invRCR*(R*b-z); endif; { sig }=LRsse(dataset,LHS_vars,RHS_vars,NUM_var,b); if _lrdv == 1; sig = sig./nobs; else; sig = sig./(nobs-(rows(RHS_vars)/rows(NUM_var))); endif; lnsig_o = ln(det(sig)); if __output; print; print ftos(0," ITER. # = %*.*lf",4,0);; print ftos(lnsig_o," LOG OF DETERMINANT "\ "OF SIGMA = %*.*lf",14,8); endif; iter=1; pcd=abs(lnsig_o); maxiter = maxc(_lriter|1); do while ((iter <= maxiter) and (pcd >= _lrtol)); isig = invpd(sig); mk = sumc(NUM_var); xzx = zeros(mk,mk); xzy = zeros(mk,1); i = 1; lbi = 1; ubi = 0; do while i <= rows(NUM_var); ubi = NUM_var[i] + ubi; j = 1; lbj = 1; ubj = 0; do while j <= rows(NUM_var); ubj = NUM_var[j] + ubj; if i == j; xzx[lbi:ubi,lbj:ubj]=isig[i,j]*xx[lbi:ubi,lbj:ubj]; elseif j > i; xzx[lbi:ubi,lbj:ubj]=isig[i,j]*xx[lbi:ubi,lbj:ubj]; elseif j < i; xzx[lbi:ubi,lbj:ubj] = xzx[lbj:ubj,lbi:ubi]'; endif; xzy[lbi:ubi] = xzy[lbi:ubi]+isig[i,j]*xy[lbi:ubi,j]; j = j + 1; lbj = ubj + 1; endo; i = i + 1; lbi = ubi + 1; endo; oldtrap = trapchk(65535); trap 1; c = invpd(xzx); trap oldtrap; if scalerr(c); goto errout("",30); endif; b = c*xzy; if type(restrict) == 13; oldtrap = trapchk(65535); trap 1; invRCR = invpd(R*c*R'); trap oldtrap; if scalerr(invRCR); goto errout("",30); endif; b = b - (c*R')*invRCR*(R*b-z); c = c - (c*R')*invRCR*(R*c); endif; se = sqrtabs(diag(c)); se0 = se .==0; t_temp = b./(se+se0); t1 = t_temp + miss(se0,1); df = zeros(g,1); t2 = zeros(rows(t1),1); lb=1; ub=0; i=1; do while i <= g; ub=NUM_var[i]+ub; if type(restrict) == 13; df[i]=nobs-NUM_var[i]+rows(R); else; df[i]=nobs-NUM_var[i]; endif; t2[lb:ub] = 2*cdftc(abs(t_temp[lb:ub]),df[i]); t2[lb:ub] = t2[lb:ub]+miss(se0[lb:ub],1); lb=ub+1; i=i+1; endo; { sig }=LRsse(dataset,LHS_vars,RHS_vars,NUM_var,b); if _lrdv == 1; sig = sig./nobs; else; sig = sig./(nobs-(rows(RHS_vars)/rows(NUM_var))); endif; lnsig_n = ln(det(sig)); pcd = abs((lnsig_n - lnsig_o)/lnsig_o)*100; if __output; print ftos(iter," ITER. # = %*.*lf",4,0);; print ftos(lnsig_n," LOG OF DETERMINANT OF "\ "SIGMA = %*.*lf",14,8); endif; lnsig_o = lnsig_n; iter=iter+1; endo; yy=0; sse=0; sumy=0; ybar=0; dw=0; ef = zeros(1,g); readisk = 0; call seekr(fp,start); count1=counter; do while count1 < lastobs; dta=readr(fp,nr); count1=count1+rows(dta); if count1 > lastobs; dta = trimr(dta,0,count1-lastobs); endif; dta=packr(dta); if ismiss(dta); continue; endif; if not what $/= RHS_vars; dta=ones(rows(dta),1)~dta; endif; y=zeros(rows(dta),g); e=zeros(rows(dta),g); e1=e; tsumy=zeros(g,1); tdw=zeros(g,1); i=1; lb=1; ub=0; do while i <= g; ub=NUM_var[i]+ub; y[.,i]=dta[.,indcv(LHS_vars[i],vnames)]; e[.,i]=dta[.,indcv(LHS_vars[i],vnames)] - dta[.,indcv(RHS_vars[lb:ub],vnames)]*b[lb:ub]; tsumy[i]=tsumy[i]+sumc(y[.,i]); e1[.,i] = lag(e[.,i]); e1[1,i] = ef[.,i]; ef[.,i]=e[rows(y[.,i]),i]; if readisk == 0; tdw[i]=tdw[i]+sumc((e[.,i] - e1[.,i])^2) - e[1,i]^2; else; tdw[i]=tdw[i]+sumc((e[.,i] - e1[.,i])^2); endif; i=i+1; lb=ub+1; endo; sse=sse+diag(moment(e,0)); yy=yy+diag(moment(y,0)); sumy=sumy+tsumy; dw=dw+tdw; readisk = readisk + 1; endo; ybar=sumy./nobs; dw=dw./sse; i=1; lb=1; ub=0; sst=0; rsq=zeros(g,1); rbsq=0; do until i > g; ub = NUM_var[i] + ub; names = LHS_vars[i]|RHS_vars[lb:ub]; sst=yy[i] - nobs*(ybar[i]^2); rsq[i]=1-(sse[i]/sst); rbsq=1-((nobs-1)/df[i])*(1-rsq[i]); if __output; call LRprt(i,names,tobs,nobs,sst,df[i],rsq[i],rbsq,sse[i], sig[i,i],"nofstat",0,0,dw[i],b[lb:ub],se[lb:ub], t1[lb:ub],t2[lb:ub],_lrdv); endif; lb = ub + 1; i = i + 1; endo; s=1./sqrtabs(diag(c)); cm=(s.*c).*s'; if __output; if _lrpcov; print; matwrt("VARIANCE-COVARIANCE MATRIX OF ESTIMATES", c,RHS_vars,RHS_vars,4); endif; if _lrpcor; print; matwrt("CORRELATION MATRIX OF ESTIMATES", cm,RHS_vars,RHS_vars,4); endif; endif; Q = 0; /* initialize the the output vector */ Q = vput(Q,"LSUR","model"); Q = vput(Q,RHS_vars,"nms"); Q = vput(Q,NUM_var,"novars"); Q = vput(Q,b,"b"); Q = vput(Q,c,"vc"); Q = vput(Q,se,"se"); Q = vput(Q,diag(sig),"s2"); Q = vput(Q,cm,"cx"); Q = vput(Q,rsq,"rsq"); Q = vput(Q,dw,"dw"); Q = vput(Q,sse,"sse"); Q = vput(Q,nobs,"nobs"); Q = vput(Q,sig,"sigma"); if fp > 0; fp=close(fp); endif; retp(Q); errout: pop err; pop errmsg; if not trapchk(1); lrerror(errmsg,err); errorlog "LSUR estimation won't be done!"; end; endif; if fp > 0; fp=close(fp); endif; retp(error(err)); endp;