/*-----------------------------------------------------------------------

Program file: 	PSE_UCEM.prg
Written by:   	Lullit Getachew
Written:        June 2013 

Note: 	This program carries out the UNIT toal cost benchmarking using 
		a single equation regression model of cost estimated with a 
	  	two-step procedure that corrects for heteroscedasticity.
  

---------------------------------------------------------------------*/
new; et = hsec; 

library dstat, lr, pgraph; 
lrset; graphset;
output file = C:\WORK\CLD\results\UCBM2.out reset;
outfile = "C:\\WORK\\CLD\\results\\UCBM2.out";
output on; screen on;

@Specify temporary files:@
temp_1 = "C:\\WORK\\CLD\\Temp_1.dat"; 
temp_2 = "C:\\WORK\\CLD\\Temp_2.dat"; 
temp_3 = "C:\\WORK\\CLD\\Temp_3.dat"; 

xls_file = "C:\\WORK\\CLD\\OEBwp.xlsx";
T_period = 10;
__trend = 1;
sheet = 6; 

? "";
?;"UNIT COST BENCHMARKING MODEL";?;

num_y = 3; num_z = 7;         
@ Definitions of variables for printed output: @
y1 = "average line length (km) per customer"; 
y2 = "annual peak demand (kW) per customer"; 
y3 = "Area (2011 service territory) per customer";

z1 = "% deliveries to general service & large customers";
z2 = "% customers added in last 10 years"; 
z3 = "sum of wind days with wind >= 10kts";
z4 = "% of single phase lines"; 
z5 = "system load factor"; 
z6 = "% lines underground";
z7 = "% underground * density (customers/area)";

? "  ";	
	? "********************************************************************************"; 
	? "  ";	
	? " Date: ";; ? Datestr(0);;
 	? " ****    ";; ? "MODEL ESTIMATION RESULTS";; 
   
	? "   ****   ";; ? "Time: ";; ? Timestr(0);
	? "  ";
	? "         		OUTPUT FILE:";; ? outfile;
	? "  ";	
    ? "         		DATA FILE:";; ? xls_file;	
	? " ";     
    ? "********************************************************************************"; 
    ? "  ";	? "  ";	
	? "        			DEFINITIONS OF OUTPUT VARIABLES:"; ? "   ";
	Format /rd 3,0;
    ? "				 		";; ? "Y1 is ";; ? Y1;	
	? " "; ? "						";; ? "Y2 is ";; ? Y2;
	? " "; ? "  						";; ? "Y3 is ";; ? Y3;
	? "  ";	? "  ";	
	? "        			DEFINITIONS OF BUSINESS CONDITION VARIABLES:"; ? "   ";
        ? "				 	";; ? "Z1 is ";; ? Z1;	
		? "				 	";; ? "Z2 is ";; ? Z2;	
		? "				 	";; ? "Z3 is ";; ? Z3;	
		? "				 	";; ? "Z4 is ";; ? Z4;	
		? "				 	";; ? "Z5 is ";; ? Z5;
		? "				 	";; ? "Z6 is ";; ? Z6;
	    ? "				 	";; ? "Z7 is ";; ? Z7;
    if __trend == 1; 
	? " "; ? "  					";; ? "Model includes time trend.";
	else;
	endif;
    if T_period == 10;	
	? " ";
	? " "; ? "                    ";; ? "Time period used: 2002";; 
                                        " through 2011";
    else;
    endif;

@ Read in data to temp_1: @

/*importf(xls_file,temp_1,0,0); */

 { nrows, ncols } = xlsGetSheetSize(xls_file,sheet);
last = 64 + ncols;
if last <= 90;
range_1 = "A1:" $+ chrs(last) $+ "1";
else;
range_1 = "A1:CD" @$+ chrs(last-26)@ $+ "1";
endif; 

vls = reshape(" ",9,1);
vls[4] = "Division by zero!";
vnames = xlsreadsa(xls_file,range_1,sheet,vls);

range = "A2"; 
vls = reshape(error(0),9,1);
vls[4] = 9999.99; 
Temp_Mtx = xlsreadm(xls_file,range,sheet,vls);

if not Saved(Temp_Mtx,Temp_1,vnames);
errorlog "An error occurred while writing Temp_1.dat";
end;
endif;

open f1 = ^Temp_1;
Temp_Mtx = readr(f1,rowsf(f1));
{ vnames,indx } = indices(Temp_1,0);
f1 = close(f1);

dataloop ^Temp_1 ^Temp_2; 

Select Year >= 2002 and Year <= 2011;

delete bad == 1;

@delete pegid == 33;@ @ Hydro One Network @
@delete pegid == 66;@ @ Toronto Hydro @

make y1 = kmaverage./yn; make y2 = peakann./yn; make y3 = area./yn; 

make z1 = pctlggs; make z2 = pctadd10;  make z3 = WIND; make z4 = pctsingle; 
make z5 = Loadfactor;  make z6 = pctug; make z7 = pctug.*(yn./area);  
 
make trend = year - 2007; make id_now = pegid; 
 
make gdppi = wo; make c = (ctotal./wtotal)./yn; 

Keep id_now year trend gdppi c y1 y2 y3 z1 z2 z3 z4 z5 z6 z7;  

endata;

/* log and mean scale explanatory vars */

open f1 = ^Temp_2;
data = readr(f1,rowsf(f1));

@ Demean input prices, outputs and any business condition variables. @
@ Log variables.  Write to temp_3.                                   @

{ vnames,indx } = indices(Temp_2,0);

f1=close(f1);
open f1 = ^Temp_2 for update; 
writer(f1,data);
f1 = close(f1);

/* initialize some variables */

id_now = Data[.,1];
grpnums = id_now[1,1]; j = 2;
do until j == rows(id_now); 
  if id_now[j,1] /= id_now[j-1,1]; grpnums = grpnums|id_now[j,1]; endif;
j = j + 1; 
endo;
dum = id_now[1:rows(id_now),.] .== grpnums'; T_period = sumc(dum); 
N_firms = rows(T_period); 

dataloop ^Temp_2 ^Temp_3;

c = ln(c);
y1 = ln(y1);  y2 = ln(y2);  y3 = ln(y3);
z1 = ln(z1); @z2 = ln(z2);@  z3 = ln(z3);
z4 = ln(z4);  z5 = ln(z5); @ z6 = ln(z6);@
@z7 = ln(z7);@ 

endata; 

open f1 = ^Temp_3;
data = readr(f1,rowsf(f1));
f1 = close(f1);
vnames = getname(Temp_3);  

/*   The model's final specification.   */

depvar =  { c }; 
rhs = { y1,y2,y3,z1,z2,z3,z4,z5,z6,z7,trend };
indvars = { const,y1,y2,y3,z1,z2,z3,z4,z5,z6,z7,trend }; 

/*  ===============================================  */

/* OLS Regression: */

__con = 0;
_olsres = 1;

nobs = rows(data);
df = nobs - rows(rhs)-1; 
x = ones(rows(data),1)~data[.,indcv(rhs,vnames)];
y = data[.,indcv("c",vnames)];

output off; screen off;
? "                           OLS REGRESSION "; ? "  ";
{ vnam, m, bols, stdbols, vcols, seols, sigols, cx, rsq, eols, dwstat } = ols(0,y,x);
? "  "; ? "  ";

/* for groupwise heteroscedasicity estimation */

gh_wt = zeros(rows(data),1);
firm = 1; lb = 1; ub = 0;
do until firm > N_firms;
ub = ub + T_period[firm,1];
gh_wt[lb:ub,.] = sqrt(meanc(eols[lb:ub,.]^2).*ones(rows(T_period[firm,1],1)));
lb = ub + 1;
firm = firm + 1;
endo;

__con = 0;
_olsres = 1;

x = (1/gh_wt).*(ones(rows(data),1)~data[.,indcv(rhs,vnames)]);
y = (1/gh_wt).*(data[.,indcv("c",vnames)]); 

@ heteroscedasticity regression @

{ vnam, m, bgh, stdbgh, vcgh, segh, siggh, cx, rsq, egh, dwstat } = ols(0,y,x);
? "  "; ? "  ";        

b = bgh; stderr = segh; t = b./stderr; estimatem = bgh;
pvt = 2*cdftc(abs(t),df);


nvar = cols(x); nvar1 = 1;
stdb = stdbgh;     
cor = cx[1:nvar,nvar+1];
sc = egh.*gh_wt;           
x = x.*gh_wt; y = y.*gh_wt;

Z = x*vcgh*x';
e = y-x*bgh;
sc = e;              
e2 = e'e/df; 
yhat = x*bgh;

ybar = meanc(y);
yy = moment(y,0);
rss = moment(sc,0);
tss = yy - nobs*(ybar^2);
rsq = 1 - (rss/tss);
rsqb = 1-((nobs-1)/df)*(1-rsq);
stdest = sqrt(rss/df);
fstat = (rsq/(1-rsq))*(df/nvar);
pvf = cdffc(fstat,nvar,df);

output on; screen on;
@***********************************************************************@
? "                    REGRESSION WITH GROUPWISE HETEROSKEDASTICITY "; ? "  ";

@ to print out heteroscedasticity corrected results - different from what the ols
  command produces in that the correct ss, rss & RSQRs printed  @

print ftos(nobs,"Valid cases:  %*.*lf",20,0);;
        print ftos(depvar,"      Dependent variable:%*.*s",20,8);
      
        print ftos(tss,"Total SS:     %*.*lf",20,3);;

        print ftos(df,"      Degrees of freedom:%*.*lf",20,0);

        print ftos(rsq,"R-squared:    %*.*lf",20,3);;
        print ftos(rsqb,"      Rbar-squared:      %*.*lf",20,3);
        print ftos(rss,"Residual SS:  %*.*lf",20,3);;
        print ftos(stdest,"      Std error of est:  %*.*lf",20,3);
        str = ftos(nvar,"F(%*.*lf,",1,0) $+ ftos(df,"%*.*lf):             "
            ,1,0);
        str = strsect(str,1,15) $+ ftos(fstat,"%*.*lf",19,3);
        print str;;
        print ftos(pvf,"      Probability of F:  %*.*lf",20,3);
     
        print ftos(dwstat,"Durbin-Watson:%*.*lf",20,3);;
        print ftos(N_firms,"      Number of Firms:   %*.*lf",20,3); 
        print;
        print "                         Standard                 Prob   Sta"\
            "ndardized  Cor with";
        print "Variable     Estimate      Error      t-value     >|t|     E"\
            "stimate    Dep Var"; 

        print "------------------------------------------------------------"\
            "-------------------";
        omat = indvars~b~stderr~t~pvt~stdb;
        omat = omat~cor;
        mask = 0~1~1~1~1~1~1;
        let fmt[7,3] = "-*.*s" 9 8 "*.*lf" 12 6 "*.*lf" 12 6 "*.*lf" 12 6 ""\
            "*.*lf" 10 3 "*.*lf" 12 6 "*.*lf" 12 6;
         call printfm(omat,mask,fmt);
@**********************************************************************************@

LevelLast3 = zeros(N_firms,8);
YearLevel = zeros(nobs,8);

@ for level total cost performance over the LAST five THREE YEARS @

lb = 1; ub = 0; firm = 1;
do until firm > N_firms;
ub = ub + T_period[firm,1];

pcost = zeros(T_period[firm,1],1);
acost = zeros(T_period[firm,1],1);
score = zeros(T_period[firm,1],1);

acost = y[lb:ub,.];
pcost = yhat[lb:ub,.];
score = sc[lb:ub,.];

df = nobs - cols(x) + 1;
T_p = T_period[firm,1]; 
one3 = (ones(1,3))./(3); 
one2 = (ones(1,2))./(2); 
sigc3 = e2/3;
sigc2 = e2/2;
if T_p == 2;
vsc3 = one2*(x[T_p-1:T_p,.]*vcgh*x[T_p-1:T_p,.]')*one2' + sigc2;
else;
vsc3 = one3*(x[T_p-2:T_p,.]*vcgh*x[T_p-2:T_p,.]')*one3' + sigc3;
endif;
setc3 = sqrt(vsc3);

if T_p == 2;
LevelLast3[firm,1] = data[lb,1];                 
LevelLast3[firm,2] = meanc(acost[T_p-1:T_p]);
LevelLast3[firm,3] = meanc(pcost[T_p-1:T_p]); 
LevelLast3[firm,4] = meanc(score[T_p-1:T_p]); 
LevelLast3[firm,5] = (meanc(score[T_p-1:T_p]))/setc3;                  
LevelLast3[firm,6] = cdftc(abs((meanc(score[T_p-1:T_p]))/setc3),df); 
LevelLast3[firm,7] = meanc(data[T_p-1:T_p,2]);
LevelLast3[firm,8] = setc3;
else;
LevelLast3[firm,1] = data[lb,1];                 
LevelLast3[firm,2] = meanc(acost[T_p-2:T_p]);
LevelLast3[firm,3] = meanc(pcost[T_p-2:T_p]); 
LevelLast3[firm,4] = meanc(score[T_p-2:T_p]); 
LevelLast3[firm,5] = (meanc(score[T_p-2:T_p]))/setc3;                  
LevelLast3[firm,6] = cdftc(abs((meanc(score[T_p-2:T_p]))/setc3),df); 
LevelLast3[firm,7] = meanc(data[T_p-2:T_p,2]);
LevelLast3[firm,8] = setc3;
endif;

@ for level total cost performance by year @

SEEOY = e2 + diag(Z);  @ standard error of the forecast error - yearly assessment @
setcY = sqrt(seeoY);
setcYY = setcY[lb:ub,.];

YearLevel[lb:ub,1] = data[lb:ub,1];     
YearLevel[lb:ub,2] = acost;
YearLevel[lb:ub,3] = pcost;             
YearLevel[lb:ub,4] = score; 
YearLevel[lb:ub,5] = score./setcYY;        
YearLevel[lb:ub,6] = cdftc(abs(score./setcYY),df); @ one-tailed test @
YearLevel[lb:ub,7] = data[lb:ub,2]; 
YearLevel[lb:ub,8] = setcYY; 

lb = ub + 1;
firm = firm + 1; 
endo;

/* load ids, sort by score and print results */

load idname[139,2] = c:\WORK\CLD\idOntario2.txt;
f1 = fopen("c:\\WORK\\CLD\\idsOntario2","r"); 

idsname = fgetsat(f1,139); 

? "   ";  ? "   "; 
? "   	IN-SAMPLE PREDICTION OF TOTAL COST LEVEL PERFORMANCE LAST THREE YEARS"; ? "   "; 

LevelLast3 = sortc(LevelLast3,4); ? "   ";
? "    Actual     Predicted Difference  t_ratio	 p_value    Utility    	 "; ? "  ";

firm = 1;
do until firm > N_firms;     
nameid = idsname[indnv(LevelLast3[firm,1],idname[.,1])];
Format /rd 10,6; 
print LevelLast3[firm,2:6];; ? "        ";; print nameid;
firm = firm + 1; 
endo;  

closeall; Output off;

