/*----------------------------------------------------------------------------

	Procedure: 		SUR.src
	Written by:   	Donald J Wyhowski
  	Written:	  	May 1, 2000 
	Last changed: 	June 7, 2000

  	Note........: 	This program estimates a system of equations using the
					iterated Seemingly Unrelated Regression (SUR) technique.  
							
                	
-----------------------------------------------------------------------------*/
 

/*
	Format:   Q = SUR(dataset,LHS_vars,RHS_vars,NUM_var,Restrict)

	Input:    dataset  -- string, name of GAUSS data set.

	          LHS_vars -- character vector of all dependent variable
                          names in the systems.  Example:

                             LHS_vars = { y1,y2,y3 };

             RHS_vars -- character vector of all independent variable
                         names in the systems.  The order of the
                         variable names must correspond to the order
                         of the equations when they are stacked.  Put
                         "CONST" in the RHS_vars list if constant term
                         is needed.  Example:
 
                             RHS_vars = { const,x1,x2,x3,     @ 1st eqn. @
                                          const,x2,x3,x4,     @ 2nd eqn. @
                                          const,x1,x3,x5 };   @ 3rd eqn. @

             NUM_var  -- numeric vector to determine the number of right-
                         hand side variables in each equation.  Following
                         the above example:

                             NUM_var = { 4,4,4 };

             Restrict -- string, constrainted information on parameters
                         to perform restricted estimation.  The syntax
                         of Restrict is as follows:
  
                             Restrict="rest1, rest2,...., restN";

                         More than one restriction is allowed provided
                         each is separated by commas.  Each restriction
                         must be written as a linear equation with all
                         variables in the left-hand side and the constant
                         in the right-hand side (i.e., x1:1+x1:2=1).
                         Variables shown in each restriction must be
                         variables in the regression model.  Note that
                         the numeric value following the (:) signifies
                         which equation the variable comes from (i.e.,
                         X4:10 indicates the X4 variable comes from the
                         10th equation).  Restrictions in the RESTRICT
                         argument must be consistent and not redundant
                         otherwise error messages will be given.  Users
                         should note that only the parameters associated
                         with the variables are restricted, and not the
                         variables in the model.
 
                         Examples of some restrict arguments:

                            1)  Restrict="x1:1 + x1:2 + x1:3 = 1";
                            2)  Restrict="const:1 + const:2 + const:3 = 1,
                                          trend:1 = 0,
                                          trend:2 = 0,
                                          trend:3 = 0";

   Output:
             Q -- a "COMPACT" output vector containing all calculated
                  statistics.  See manual for more details on extracting
                  information from it.  Variables contained in Q are:
 
                    nms   -- name of the regressors.
                    b     -- regression coefficients.
                    vc    -- variance-covariance matrix of b.
                    se    -- standard error of b.
                    s2    -- variance of the error.
                    cx    -- correlation matrix of b.
                    rsq   -- coefficient of determination.
                    rbsq  -- adjusted R-squared.
                    dw    -- Durbin-Watson statistic.
                    nobs  -- number of observations.
                    sigma -- residual covariance matrix.
                    sse   -- residual sum of square.
 
   Globals:

               _lrdv    -- scalar.  Determines which divisor is used to
                           compute the covariance matrix of the error.

                         0   T-(K/M) is used as divisor, where T is the
                             number of observations, K is the number of
                             all right-hand side variables in the systems,
                             and M is the total number of equations. Hence,
                             (K/M) is the average number of coefficients
                             per equation.
  
                         1   T is used as divisor.  Users are encouraged
                             to use this, since it provides good asmptotic
                             properties for the estimator.
  
                         Default = 1.
  
           _lriter    -- scalar.  Sets the maximum number of iterations
                         for the iterated seemingly unrelated regression.
                         The iterative process is also subject to the
                         convergence criterion _lrtol.  Default = 1.

             __output -- scalar.  If nonzero, results are printed.
                         Default = 2.

           _lrpcor    -- scalar.  If 1, print the correlation matrix of
                         all coefficients in the systems after convergence.
                         Default = 0.

           _lrpcov    -- scalar.  If 1, print the covariance matrix of
                         all coefficients in the systems after convergence.
                         Default = 0.

             __range  -- a 2 x 1 vector.  Specifies the range of the
                         data set to be used in estimation.  The first
                         element specifies the beginning observation
                         while the second element specifies the ending
                         observation.

                         Example: __range = { 100,200 }.

                         Default is { 0,0 } and uses the whole data set.

            _lrtol    -- scalar.  Specifies a convergence criterion to
                         stop the iterative process.  The iterative
                         process will continue until either the iteration
                         limit specified in _lriter is reached or the
                         percentage change in the log of determinant of
                         sigma is less than the convergence criterion.
                         Default = 0.0001.

             __title  -- string, message printed at the top of the
                         results.  Default ="";

*/

#include lr.ext;
#include gauss.ext;

proc(1) = sur(dataset,LHS_vars,RHS_vars,NUM_var,restrict);

    local oldtrap,start,counter,count1,lastobs,err,iter,maxiter,pcd,
          lnsig_o,lnsig_n,R,rank_R,z,invRCR,tobs,nobs,fp,nr,i,j,g,mk,lb,ub,
          lbi,ubi,lbj,ubj,sst,rsq,rbsq,readisk,what,vnames,names,
          indx,xzx,xzy,sig,isig,b,c,s,cm,se,se0,t1,t_temp,t2,df,dta,
          ixxtemp,xx,xy,e,yy,y,sse,ybar,sumy,tsumy,dw,tdw,ef,e1,
          errmsg,Y_index,X_index,Q,rr;

    if __output;
       call header("LINEAR SEEMINGLY UNRELATED REGRESSION",
                   dataset,0);
    endif;

    dataset = "" $+ dataset;
    fp = -1;
    open fp = ^dataset;
    if fp == -1;
        goto errout("Data file: " $+ dataset,1);
    endif;
    if sumc(NUM_var) /= rows(RHS_vars);
        goto errout("# of RHS_vars = " $+ ftos(rows(RHS_vars),"%*.*lf",1,0)
                     $+ "     Total NUM_var = " $+
                     ftos(sumc(NUM_var),"%*.*lf",1,0),36);
    endif;

    { nr,start,counter,lastobs } = _rngchk(dataset,__range); 

nr=lastobs; 

    if __output;
       print;
       if (start /= 1 or lastobs /= rowsf(fp));
          print ("SAMPLE RANGE SET TO: "
                  $+ ftos(__range[1],"%*.*lf",1,0)
                  $+ " TO " $+ ftos(__range[2],"%*.*lf",1,0));
       endif;
       if _lrdv == 1;
          print "DIVISOR USING N IN EFFECT";
       endif;
       if type(restrict) == 13;
          print "RESTRICTIONS IN EFFECT ";
       endif;
    endif;

    tobs = lastobs-start+1;
    what = { const };
    { vnames, indx } = indices(dataset,0);
    clear indx;

    if not what $/= RHS_vars;
       vnames = "CONST"|vnames;
    endif;

    if ismiss(indcv((LHS_vars|RHS_vars),vnames));
        goto errout("Check the variable names carefully",2);
    endif;

    if type(restrict) == 13;
       restrict=chrs(packr(miss(miss(miss(vals(restrict),10),13),32)));
       { R,z }=SRMatrix(restrict,RHS_vars,NUM_var);
       if scalerr(R);
          goto errout("",scalerr(R));
       endif;
    endif;

    Y_index = indcv(LHS_vars,vnames);
    X_index = indcv(RHS_vars,vnames);

    nobs = 0;
    xx=0;
    xy=0;
    call seekr(fp,start);
    count1=counter;
    do while count1 < lastobs;
       dta=readr(fp,nr);
       count1=count1+rows(dta);
       if count1 > lastobs;
          dta = trimr(dta,0,count1-lastobs);
       endif;
       dta=packr(dta); 
       if ismiss(dta);
          continue;
       endif;
       nobs = nobs + rows(dta);
       if not what $/= RHS_vars;
          dta = ones(rows(dta),1)~dta;
       endif;
       xx = xx + dta[.,X_index]'*dta[.,X_index];
       xy = xy + dta[.,X_index]'*dta[.,Y_index];
    endo;

    g=rows(NUM_var);
    b=zeros(rows(RHS_vars),1);
    lb=1;
    ub=0;
    i=1;
    do while i <= g;
       ub = NUM_var[i] + ub;
       oldtrap = trapchk(65535);
       trap 1;
       ixxtemp=invpd(xx[lb:ub,lb:ub]);
       trap oldtrap;
       if scalerr(ixxtemp);
          goto errout("",30);
       endif;
       b[lb:ub]=ixxtemp*xy[lb:ub,i];  
       lb = ub + 1;
       i = i + 1;
    endo;

    if type(restrict) == 13;
       c=zeros(rows(RHS_vars),rows(RHS_vars));
       lb=1;
       ub=0;
       i=1;
       do while i <= g;
          ub=NUM_var[i]+ub;
          c[lb:ub,lb:ub] = xx[lb:ub,lb:ub];
          lb=ub+1;
          i=i+1;
       endo;
       c=invpd(c);

       oldtrap = trapchk(65535);
       trap 1;
       invRCR = invpd(R*c*R');
       trap oldtrap;
       if scalerr(invRCR);
          goto errout("",30);
       endif;
       b = b - (c*R')*invRCR*(R*b-z);
    endif;

    { sig }=LRsse(dataset,LHS_vars,RHS_vars,NUM_var,b);

    if _lrdv == 1;   
       sig = sig./nobs;   
    else;   
       sig = sig./(nobs-(rows(RHS_vars)/rows(NUM_var)));
    endif;

    lnsig_o = ln(det(sig));
    if __output;
       print;
       print ftos(0,"        ITER. # = %*.*lf",4,0);;
       print ftos(lnsig_o,"    LOG OF DETERMINANT "\
                  "OF SIGMA = %*.*lf",14,8);
    endif;

    iter=1;
    pcd=abs(lnsig_o);
    maxiter = maxc(_lriter|1);
    do while ((iter <= maxiter) and (pcd >= _lrtol));
       isig = invpd(sig); 
       mk = sumc(NUM_var);
       xzx = zeros(mk,mk); 
       xzy = zeros(mk,1);
       i = 1;
       lbi = 1;
       ubi = 0;
       do while i <= rows(NUM_var);
          ubi = NUM_var[i] + ubi;
          j = 1;
          lbj = 1;
          ubj = 0;
          do while j <= rows(NUM_var);
             ubj = NUM_var[j] + ubj;
             if i == j;
                xzx[lbi:ubi,lbj:ubj]=isig[i,j]*xx[lbi:ubi,lbj:ubj];
             elseif j > i;
                xzx[lbi:ubi,lbj:ubj]=isig[i,j]*xx[lbi:ubi,lbj:ubj];
             elseif j < i;
                xzx[lbi:ubi,lbj:ubj] = xzx[lbj:ubj,lbi:ubi]';
             endif;
             xzy[lbi:ubi] = xzy[lbi:ubi]+isig[i,j]*xy[lbi:ubi,j];
             j = j + 1;
             lbj = ubj + 1;
          endo;
          i = i + 1;
          lbi = ubi + 1;
       endo;

       oldtrap = trapchk(65535);
       trap 1;
       c = invpd(xzx);
       trap oldtrap;
       if scalerr(c);
          goto errout("",30);
       endif;
       b = c*xzy;

       if type(restrict) == 13;
          oldtrap = trapchk(65535);
          trap 1;
          invRCR = invpd(R*c*R');
          trap oldtrap;
          if scalerr(invRCR);
             goto errout("",30);
          endif;
          b = b - (c*R')*invRCR*(R*b-z);
          c = c - (c*R')*invRCR*(R*c);
       endif;

       se = sqrtabs(diag(c));
       se0 = se .==0;
       t_temp = b./(se+se0);
       t1 = t_temp + miss(se0,1);
       df = zeros(g,1);
       t2 = zeros(rows(t1),1);
       lb=1;
       ub=0;
       i=1;
       do while i <= g;
          ub=NUM_var[i]+ub;
          if type(restrict) == 13;
             df[i]=nobs-NUM_var[i]+rows(R);
          else;
             df[i]=nobs-NUM_var[i];
          endif;
          t2[lb:ub] = 2*cdftc(abs(t_temp[lb:ub]),df[i]);
          t2[lb:ub] = t2[lb:ub]+miss(se0[lb:ub],1);
          lb=ub+1;
          i=i+1;
       endo;

       { sig }=LRsse(dataset,LHS_vars,RHS_vars,NUM_var,b);

       if _lrdv == 1;   
          sig = sig./nobs;   
       else;   
          sig = sig./(nobs-(rows(RHS_vars)/rows(NUM_var)));
       endif;


       lnsig_n = ln(det(sig));
       pcd = abs((lnsig_n - lnsig_o)/lnsig_o)*100;
       if __output;
          print ftos(iter,"        ITER. # = %*.*lf",4,0);;
          print ftos(lnsig_n,"    LOG OF DETERMINANT OF "\
                     "SIGMA = %*.*lf",14,8);
       endif;
       lnsig_o = lnsig_n; 
       iter=iter+1;

    endo;   

    yy=0;
    sse=0;
    sumy=0;
    ybar=0;
    dw=0;
    ef = zeros(1,g);
    readisk = 0;
    call seekr(fp,start);
    count1=counter;
    do while count1 < lastobs;
       dta=readr(fp,nr);
       count1=count1+rows(dta);
       if count1 > lastobs;
          dta = trimr(dta,0,count1-lastobs);
       endif;
       dta=packr(dta);
       if ismiss(dta);
          continue;
       endif;
       if not what $/= RHS_vars;
          dta=ones(rows(dta),1)~dta;
       endif;
       y=zeros(rows(dta),g); 
       e=zeros(rows(dta),g); 
       e1=e;
       tsumy=zeros(g,1);
       tdw=zeros(g,1);
       i=1;
       lb=1;
       ub=0;
       do while i <= g;
          ub=NUM_var[i]+ub;
          y[.,i]=dta[.,indcv(LHS_vars[i],vnames)];
          e[.,i]=dta[.,indcv(LHS_vars[i],vnames)] -
                   dta[.,indcv(RHS_vars[lb:ub],vnames)]*b[lb:ub]; 
          tsumy[i]=tsumy[i]+sumc(y[.,i]);
          e1[.,i] = lag(e[.,i]);
          e1[1,i] = ef[.,i];
          ef[.,i]=e[rows(y[.,i]),i];
          if readisk == 0;
               tdw[i]=tdw[i]+sumc((e[.,i] - e1[.,i])^2) - e[1,i]^2;
          else;
               tdw[i]=tdw[i]+sumc((e[.,i] - e1[.,i])^2);
          endif;
          i=i+1;
          lb=ub+1;
       endo;
       sse=sse+diag(moment(e,0));
       yy=yy+diag(moment(y,0));
       sumy=sumy+tsumy;
       dw=dw+tdw;
       readisk = readisk + 1;
    endo;

    ybar=sumy./nobs;
    dw=dw./sse;

    i=1;
    lb=1;
    ub=0;
    sst=0;
    rsq=zeros(g,1);
    rbsq=0;
    do until i > g;
       ub = NUM_var[i] + ub;
       names = LHS_vars[i]|RHS_vars[lb:ub];
       sst=yy[i] - nobs*(ybar[i]^2);
       rsq[i]=1-(sse[i]/sst);
       rbsq=1-((nobs-1)/df[i])*(1-rsq[i]);
       if __output;
          call LRprt(i,names,tobs,nobs,sst,df[i],rsq[i],rbsq,sse[i],
                     sig[i,i],"nofstat",0,0,dw[i],b[lb:ub],se[lb:ub],
                     t1[lb:ub],t2[lb:ub],_lrdv);
       endif;
       lb = ub + 1;
       i = i + 1;
    endo;

    s=1./sqrtabs(diag(c));
    cm=(s.*c).*s';

    if __output;
       if _lrpcov;
          print;
          matwrt("VARIANCE-COVARIANCE MATRIX OF ESTIMATES",
                   c,RHS_vars,RHS_vars,4);
       endif;
       if _lrpcor;
          print;
          matwrt("CORRELATION MATRIX OF ESTIMATES",
                   cm,RHS_vars,RHS_vars,4);
       endif;
    endif;

    Q = 0;     /* initialize the the output vector */
    Q = vput(Q,"LSUR","model");
    Q = vput(Q,RHS_vars,"nms");
    Q = vput(Q,NUM_var,"novars");
    Q = vput(Q,b,"b");
    Q = vput(Q,c,"vc");
    Q = vput(Q,se,"se");
    Q = vput(Q,diag(sig),"s2");
    Q = vput(Q,cm,"cx");
    Q = vput(Q,rsq,"rsq");
    Q = vput(Q,dw,"dw");
    Q = vput(Q,sse,"sse");
    Q = vput(Q,nobs,"nobs");
    Q = vput(Q,sig,"sigma");

    if fp > 0;
       fp=close(fp);
    endif;

    retp(Q);

errout:
    pop err;
    pop errmsg;

    if not trapchk(1);
       lrerror(errmsg,err);
       errorlog "LSUR estimation won't be done!";
       end;
    endif;
    if fp > 0;
       fp=close(fp);
    endif;
    retp(error(err));

endp;