#This script contains a variety of functions for estimating total cost models (using iterated SUR) and benchmarking firms in a given sample
#Currently, no custom classes are included in this script. Model results are contained in lists, which can prove somewhat unwieldy.
#At some point in the future, it would be advisable to design a class for the model returned by hetsur as well as default summary and print methods
#Any number of factors of production may be used with all of these functions


#This function meanscales selected variables.
#Forecast values for certain companies may be included in the input dataset but excluded from the calculation of the means 
#by using the incfc and fcvar options.
#The function returns a list containing two dataframes, 
#the mean-scaled data and the list of means used to scale the data.
#The verbose option is provided for debugging purposes, changing it from its default to T will cause 
#the function to print the class of the each variable being mean scaled, its name, and its mean, 
#as the function loops over the variables specified by the user.

meanscale <- function(fulldata,
                      varlist,
                      verbose = F,
                      incfc = F,
                      fcvar = NULL) {
  
  #Create data frame in which to store the means of the variables.
  meanframe <-
    data.frame(
      varname = varlist,
      varmean = rep(0, length(varlist)),
      stringsAsFactors = F
    )
  
  #Initialize vector of means
  tempmeans <- NULL
  
  #Create temporary datasets. This distinction is only meaningful if the incfc and fcvar options are specified.
  newdata <- fulldata
  meandata <- fulldata
  
  #Check if forecast values have been included in the input data frame, and if so, drop them from the dataset used to calculate the sample means.
  
  if (incfc == T) {
    fcvarmark <- which(names(fulldata) == fcvar)
    tempdrop <- which(fulldata[, fcvarmark] == 1)
    meandata <- meandata[-tempdrop, ]
    rm(tempdrop)
  }
  
  #Loop over variables to be mean-scaled
  for (i in varlist) {
    #Get location of current variable in the data frame
    varmark <- which(names(meandata) == i)
    if (verbose == T) {
      print(class(meandata[, varmark]))
    }
    #Calculate the mean of the current variable, excluding missing values from this calculation.
    tempmean <- mean(meandata[, varmark], na.rm = T)
    if (verbose == T) {
      print(c(i, tempmean))
    }
    #Append the mean of the current variable to the vector of means
    tempmeans <- c(tempmeans, tempmean)
    
    #Scale all observations data for the current variable (not necessarily just the data used to calculate the mean) by the sample mean.
    newdata[, varmark] <- fulldata[, varmark] / tempmean
    #Remove temporary variables from memory before next cycle of loop.
    rm(tempmean, varmark)
  }
  
  #Fill the column of the data frame of variable means with the vector constructed during the above loop.
  meanframe$varmean <- tempmeans
  
  #Remove temporary objects from memory.
  rm(tempmeans, meandata)
  
  #Create list of outputs.
  outputmean <- list(newdata, meanframe)
  
  #Return outputs.
  return(outputmean)
  
}

#This function logs selected variables
#The verbose option is provided for debugging purposes. Changing it from its default to T will 
#cause the function to print the variable name and class as it loops over the variables specified by the user.
logvars <- function(fulldata, varlist, verbose = F) {
  #Create local data frame for new data.
  newdata <- fulldata
  varinfo <-
    data.frame(
      varname = varlist,
      check0 = rep(0, length(varlist)),
      stringsAsFactors = F
    )
  
  
  #Loop over variables to be logged
  for (i in 1:length(varlist)) {
    tempvar <- varlist[i]
    #Get location of current variable in input data frame
    varmark <- which(names(fulldata) == tempvar)
    
    #check if there are any zero values in the current variable
    check0 <- ifelse(length(which(fulldata[, varmark] == 0)) > 0, 1, 0)
    
    #print information on current variable if verbose was specified as true
    if (verbose == T) {
      print(c(tempvar, class(fulldata[, varmark]), check0))
    }
    #Replace current variable in the local data frame with the natural logarithm of that variable in the input data frame. 
    #If that variable has any zero values, add 1 to all values before logging. #NB: DO NEED TO ADD 1!! DOES IT FOR YOU!
    if (check0 == 0) {
      newdata[, varmark] <- log(fulldata[, varmark])
    }
    else{
      newdata[, varmark] <- log(1 + fulldata[, varmark])
    }
    #Remove temporary variables before next cycle of loop.
    varinfo$varname[i] <- tempvar
    varinfo$check0[i] <- check0
    rm(varmark, check0, tempvar)
  }
  #Return output data frame.
  outputlogs <- list(newdata, varinfo)
  return(outputlogs)
}




#hetsur is a regression routine that has been almost entirely translated directly from gauss to R
#The structure of the code follows the original gauss code found in SUR3HUP.src extremely closely
#Annotations are included to help the user understand the code, but not to explain why the routine is carried out in this way
#It is likely that the program could be improved somewhat by taking advantage of features of R not available in Gauss
#The arguments of this program are as follows:
# 1) lhs_vars: a vector containing the string names of the the dependent variables, in order (cost should always go first)
# 2) rhs_vars: a vector containing the string names of each set of independent variables 
#    (order of price variables in share equations should match order of factors in lhs_vars).
#    The sets should be ordered by equation (as defined by the order of lhs_vars) and variables should be repeated as necessary.
# 3) num_var: the number of independent variables in each equation (also ordered according to lhs_vars)
# 4) restrict.R: the left hand side of the matrix defining restrictions on the coefficients.
#    Columns in restrict.R correspond to the similarly indexed variable in rhs_vars.
#    For Total Cost Models, this is used to force coefficients in different equations to 
#    be equal to one another following shephard's lemma.
# 5) restrict.z: the right hand side of the matrix defining restrictions on the coefficients.
# 6) ac: whether an autocorrelation correction should be performed prior to estimation and if so, what kind.
#    "none" results in no autocorrelation correction.
#    "ar1" results in an autocorrelation correction using the same rho for all companies
#    "psar1" results in an autocorrelation correction using different rhos for each company
#    It should be noted that the Gauss code and this R code do not use the same estimation method for a 
#    single autocorrelation coefficient and the panel-specific autocorrelation coefficients
# 7) het: whether a heteroskedasticity correction should be applied prior to estimation (T = yes, F = no)
# 8) lrdv: Determines which divisor is used to compute the covariance matrix of the error
#    If 0, T-(K/M) is used as divisor, where T is thenumber of observations, K is the number of all 
#    right-hand side variables in the systems,and M is the total number of equations. 
#    Hence,(K/M) is the average number of coefficientsper equation.
#    If 1, T is used as divisor. Users are encouraged to use this, since it provides good asymptotic properties for the estimator.
#lriter: maximum number of iterations of the routine without convergence before returning estimates
#lrtol: the threshold for determining convergence
#verbose: prints progress of determinant of sigma matrix

#The object it returns contains a list of the following:
# 1) Dependent.Variables: a vector the strings of the dependent variables in the model
# 2) Models: a list of data frames containing the estimation results (coefficients, standard errors, t statistics, and p values)
# 3) Covariance.Matrix: the covariance matrix of the estimated parameters
# 4) Correlation.Matrix: the correlation matrix of the estimated parameters
# 5) Sigma.Matrix: the matrix of equation variances/covariances
# 6) Model.Statistics: a list of various model statistics
# 7) Transformed.Data: the data after the application of the heteroskedasticity and autocorrelation corrections
# 8) Estimation.Parameters: a list of parameters used in estimating the model 
#    (important for replication later during out of sample reestimation)
# 9) The following parameters are recorded: restrict.R, restrict.z, het, ac, lrdv, lriter, lrtol
# 10) Original.Data: the data before the application of the heteroskedasticity and autocorrelation corrections


hetsur <-
  function(lhs_vars,
           rhs_vars,
           num_var,
           data,
           idvar,
           tvar,
           restrict.R,
           restrict.z,
           het = T,
           ac = c("none", "ar1", "psar1"),
           lrdv = 1,
           lriter = 100,
           lrtol = .0001,
           verbose = T) {
    
    #add new variables, as necessary to the full data frame.
    #AV: function add interaction terms
    alldata <- addvars(data = data, rhs_vars = rhs_vars)
    
    #check which columns in the data are numeric and reduce the data set to only those
    #NEW 072016
    numtest <- sapply(alldata, is.numeric)
    nummark <- which(numtest == T)
    data <- alldata[, nummark]
    
    #convert data to matrix
    dta <- as.matrix(data)
    
    #set values of objects involved in applying the restrictions during estimation
    R <- restrict.R
    z <- restrict.z
    
    #find the locations (column indexes) of the dependent and independent variables in the full data frame.
    lhslocs <- match(lhs_vars, names(data))
    rhslocs <- match(rhs_vars, names(data))
    
    #find the locations (column indexes) of the id and time variables in the full data frame
    idvarloc <- match(idvar, names(data))
    tvarloc <- match(tvar, names(data))
    
    #order the full data by id, then time
    dta <- dta[order(dta[, idvarloc], dta[, tvarloc]), ]
    
    #create a table of how many observations appear for each id number, convert it to a dataframe, 
    #and rename its columns appropriately
    idtlist <- table(data[, idvarloc])
    idtframe <- as.data.frame(idtlist)
    names(idtframe) <- c(idvar, "apps") #apps for appearnces
    class(idtframe[, 2]) <- "numeric"
    
    #record how many firms are in the dataset
    N_firms <- dim(idtframe)[1]
    
    
    #calculate the values of the matrix x`x and x`y.
    #AV: %*% is matrix multiplication operator 
    xx <- t(dta[, rhslocs]) %*% dta[, rhslocs]
    xy <- t(dta[, rhslocs]) %*% dta[, lhslocs]
    
    #create empty matrices for old x data, new x data, and lagged x data
    dtax <- matrix(0, dim(dta)[1], dim(xx)[2])
    dtax1 <- matrix(0, dim(dta)[1], dim(xx)[2])
    dtaxL <- matrix(0, dim(dta)[1], dim(xx)[2])
    
    #create empty matrices for old y data, new y data, and lagged y data
    dtay <- matrix(0, dim(dta)[1], length(num_var))
    dtay1 <- matrix(0, dim(dta)[1], length(num_var))
    dtayL <- matrix(0, dim(dta)[1], length(num_var))
    
    #initialize hetloop (this variable tracks progress in the calculation and application of heteroskedasticity 
    #and/or autocorrelation corrections)
    hetloop <- 1
    
    #enter loop for calculation and application of heteroskedasticity and/or autocorrelation corrections
    while (hetloop <= 2) {
      #check whether loop has been completed once and therefore heteroskedsticity weights 
      #and/or autocorrelation coefficients are available
      #if so, enter a block of code which applies the corrections
      if (hetloop == 2) {
        #initialize variables marking the columns of the x data currently in use
        lb <- 1
        ub <- 0
        #initialize the variable marking the column of the y data currently in use
        i <- 1
        
        #loop over columns of y data, stop when column index exceeds the number of equations
        #AV:g is defined below since this part is not run during the first iteration of the loop.
        while (i <= g) { 
          #properly increment the upper bound of the columns currently in use for the x data (incremented by the number of variables in the equation at hand)
          ub <- num_var[i] + ub
          #initialize a variable tracking the firm currently in use
          k <- 1
          #initialize variables marking the rows of data corresponding to the firm currently in use
          lbk <- 1
          ubk <- 0
          
          while (k <= N_firms) {
            #properly increment the upper bound of the rows currently in use 
            #(incremented by the number of observations for the current firm in the data)
            ubk <- ubk + idtframe$apps[k]
            
            #divide each column of the rows of x data currently selected by the corresponding heteroskedasticity 
            #weights for the equation/firm in question
            dtax[lbk:ubk, lb:ub] <-
              sweep(dta[lbk:ubk, rhslocs[lb:ub]], 1, wts[lbk:ubk, i], `/`)
            
            #create lagged x data with a row of zeroes in the initial year
            dtaxL[lbk:ubk, lb:ub] <-
              rbind(rep(0, ub - lb + 1), dtax[lbk:(ubk - 1), lb:ub])
            #perform autocorrelation correction for the first year in which the firm appears in the data set
            dtax1[lbk, lb:ub] <- sqrt(1 - rhoi[lbk, i] ^ 2) * dtax[lbk, lb:ub]
            
            #check whether there is more than one year after the first year for the current firm 
            #and then use the appropriate function to carry out the autocorrelation correction in other years.
            #this distinction is necessary because sweep() does not work if only one row of data is given to it
            if (lbk + 1 < ubk) {
              dtax1[(lbk + 1):ubk, lb:ub] <-
                dtax[(lbk + 1):ubk, lb:ub] - sweep(dtaxL[(lbk + 1):ubk, lb:ub], 1, rhoi[(lbk +
                                                                                           1):ubk, i], `*`)
            }
            if (lbk + 1 == ubk) {
              dtax1[(lbk + 1):ubk, lb:ub] <-
                dtax[(lbk + 1):ubk, lb:ub] - dtaxL[(lbk + 1):ubk, lb:ub] * rhoi[(lbk + 1):ubk, i]
            }
            
            
            #dtay[,i] <- dta[,lhslocs[i]]/wts[,i] CHANGED AS BELOW
            
            #divide each the rows of y data currently selected by the corresponding heteroskedasticity weights 
            #for the equation/firm in question
            dtay[lbk:ubk, i] <- dta[lbk:ubk, lhslocs[i]] / wts[lbk:ubk, i]
            #create lagged y data with a zero for the first year in which the firm appears in the data set
            dtayL[lbk:ubk, i] <- c(0, dtay[lbk:(ubk - 1), i])
            #perform the autocorrelation correction for the first year in which the firm appears in the data set
            dtay1[lbk, i] <- sqrt(1 - rhoi[lbk, i] ^ 2) * dtay[lbk, i]
            #apply the autocorrelation correction in the remaining years
            dtay1[(lbk + 1):ubk, i] <-
              dtay[(lbk + 1):ubk, i] - (rhoi[(lbk + 1):ubk, i] * dtayL[(lbk + 1):ubk, i])
            
            #increment the variable tracking the firm currently in use
            k <- k + 1
            
            #increment the variable tracking the lower bound of the rows currently in use (incremented to the upper bound of the rows just corrected plus 1)
            lbk <- ubk + 1
          }
          
          #increment the variable tracking the lower bound of the columns of x data currently in use (incremented to the upper bound of the columns just corrected plus 1)
          lb <- ub + 1
          #increment the variable tracking the equation in use
          i <- i + 1
        }
        
        
        
        #DELIF EXPRESSIONS OMITTED????
        #calculate new x`x and x`y matrices using corrected data
        xx <- t(dtax1) %*% dtax1
        xy <- t(dtax1) %*% dtay1
        
        #exit section of code that corrects data, began with if(hetloop==2)
      }
      
      #since hetloop always starts off at 1, this will be the first section of code actually executed when the program runs
      #get the number of equations
      g <- length(num_var)
      #initialize the matrix of coefficients
      b <- matrix(0, length(rhs_vars), 1)
      #initialize variables marking the columns of the x data currently in use
      lb <- 1
      ub <- 0
      #initialize the variable marking the column of the y data currently in use
      i <- 1
      
      #loop over columns of y data, stop when column index exceeds the number of equations
      while (i <= g) {
        #properly increment the upper bound of the columns currently in use for the x data (incremented by the number of variables in the equation at hand)
        ub <- num_var[i] + ub
        #get inverse of x`x matrix
        ixxtemp <- solve(xx[lb:ub, lb:ub])
        #calculate coefficients for the equation currently in use
        b[lb:ub] <- ixxtemp %*% xy[lb:ub, i]
        #increment the lower bound of the columns of x data currently in use
        lb <- ub + 1
        #increment the column of y data currently in use
        i <- i + 1
      }
      
      #check whether heteroskedasticity and autocorrelation corrections have been performed and, 
      #if so, apply restrictions to estimated coefficients
      if (hetloop == 2) {
        c <- matrix(0, length(rhs_vars), length(rhs_vars))
        lb <- 1
        ub <- 0
        i <- 1
        while (i <= g) {
          ub <- num_var[i] + ub
          c[lb:ub, lb:ub] <- xx[lb:ub, lb:ub]
          lb <- ub + 1
          i <- i + 1
        }
        
        c <- solve(c)
        invRcR <- solve(R %*% c %*% t(R))
        b <- b - (c %*% t(R)) %*% invRcR %*% (R %*% b - z)
      }
      
      
      #check whether heteroskedasticity and autocorrelation corrections have been performed and, 
      #if not, calculate the groupwise heteroskedasticity weights and autocorrelation coefficient(s) to be used
      if (hetloop == 1) {
        #initialize matrix of residuals
        eols <- matrix(0, dim(dta)[1], g)
        #initialize matrix of lagged residuals
        eols1 <- matrix(0, dim(dta)[1], g)
        #initialize matrix of heteroskedasticity weights (nobs x neqs)
        gh_wt <- matrix(0, dim(dta)[1], g)
        #initialize matrix of heteroskedasticity weights (n_firms x neqs)
        gh_wt0 <- matrix(0, N_firms, g)
        #initialize matrix of firm durbin watson components
        ddwf <- matrix(0, N_firms, g)
        #initialize vector of durbin watson statistics
        ddw <- matrix(0, g, 1)
        #initialize matrices of autocorrelation coefficients
        rhoi <- matrix(0, dim(dta)[1], g)
        rho2a <- matrix(0, 1, g)
        rho1 <- matrix(0, dim(dta)[1], g)
        rho2 <- matrix(0, dim(dta)[1], g)
        
        #initialize a variable marking the equation (column of y data) currently in use
        i <- 1
        #initialize variables marking the columns of the x data currently in use
        lb <- 1
        ub <- 0
        #loop until index exceeds number of columns/equations it is indexing
        while (i <= g) {
          #properly increment upper bound of columns of x data currently in use
          ub <- num_var[i] + ub
          #initialize variable marking the firm currently in use
          k <- 1
          #initialize variables marking the rows corresponding to the firm currently in use
          #LBK AND UBK ARE NOT USED(?)
          lbk <- 1
          ubk <- 0
          lbkk <- 1
          ubkk <- 0
          while (k <= N_firms) {
            #properly increment upper bound of rows currently in use
            ubk <- ubk + idtframe$apps[k] - 1
            ubkk <- ubkk + idtframe$apps[k]
            #generate residuals for the current dependent variable and company
            eols[lbkk:ubkk, i] <-
              dta[lbkk:ubkk, lhslocs[i]] - dta[lbkk:ubkk, rhslocs[lb:ub]] %*% b[lb:ub]
            #TWO STEPS NECESSARY HERE?
            #calculate company specific variance and assign its square root to the relevant components of the group heteroskedasticity weight matrices
            gh_wt0[k, i] <-
              sqrt(mean(eols[lbkk:ubkk, i] ^ 2) - mean(eols[lbkk:ubkk, i]) ^ 2)
            gh_wt[lbkk:ubkk, i] <- gh_wt0[k, i]
            
            #generate lagged residuals with a 0 in the initial position
            eols1[lbkk:ubkk, i] <- c(0, eols[lbkk:(ubkk - 1), i])
            ##IS DW STAT INCORRECT??? why not sum((eols[(lbkk+1):ubkk,i]-eols1[lbkk:(ubkk-1),i])^2)-eols[lbkk:ubkk,i]^2
            #calculate company specific durbin watson component
            ddwf[k, i] <-
              sum((eols[lbkk:ubkk, i] - eols1[lbkk:ubkk, i]) ^ 2) - sum(eols[lbkk, i] ^
                                                                          2)
            #calculate company specific autocorrelation coefficient (corresponds to scorr in panelAR)
            rho1[lbkk:ubkk, i] <-
              sum(eols[lbkk:ubkk, i] * eols1[lbkk:ubkk, i]) / sum(eols[lbkk:ubkk, i] ^
                                                                    2)
            #increment index tracking the company currently in use
            k <- k + 1
            #increment lower bounds of rows that correspond to the company currently in use
            lbk <- ubk + 1
            lbkk <- ubkk + 1
            
            #calculate full sample common autocorrelation coefficient, 
            #this gets redone repeatedly until it is done properly at the very end of the loop
            ddw[i] <- sum(ddwf[, i])
            rho2a[, i] <- 1 - .5 * (ddw[i] / sum(eols[, i] ^ 2))
            rho2[, i] <- rho2a[, i]
          }
          #increment lower bound of columns of x data currently in use
          lb <- ub + 1
          #increment equation/dependent variable/column of y data currently in use
          i <- i + 1
        }
        
        #check input parameter het and set wts according to the result
        #if het is false, all observations will just be divided by one during the next loop cycle, changing nothing
        #if het is true, all observations will be divided by the appropriate weight
        if (het == T) {
          wts <- gh_wt
        }
        if (het == F) {
          wts <- matrix(1, dim(gh_wt)[1], dim(gh_wt)[2])
        }
        
        #check input parameter ac and set weight according to the result
        #if ac is "none", the the autocorrelation coefficient will be set to 0, 
        #which will result in no changes when the correction is applied in the enxt cycle of the loop
        if (ac == "psar1") {
          rhoi <- rho1
        }
        if (ac == "ar1") {
          rhoi <- rho2
        }
        if (ac == "none") {
          rhoi <- matrix(0, dim(dta)[1], g)
        }
      }
      #increment the variable tracking progress in applying the autocorrelation and heteroskedasticity corrections
      hetloop <- hetloop + 1
    }
    #create an object containing the transformed dataset
    hdataset <- cbind(dtay1, dtax1)
    
    #initialize a variable tracking the current iteration
    iter <- 0
    #get the sigma matrix for the current parameter values
    sig <-
      LRsseh(
        lhs_vars = lhs_vars,
        rhs_vars = rhs_vars,
        num_var = num_var,
        b = b,
        data = hdataset,
        g = g
      )
    #calculate and display the log of the determinant of the sigma matrix (divided by the number of observations)
    if (lrdv == 1) {
      sig <- sig / dim(hdataset)[1]
    }
    lnsig_o <- log(det(sig))
    if (verbose == T) {
      print(paste("ITERATION ", iter, sep = ""))
      print(paste("LOG OF DETERMINANT OF SIGMA = ", lnsig_o, sep = ""))
    }
    #store the absolute value of the log of the determinant of sigma
    pcd <- abs(lnsig_o)
    #copy lriter into a new variable that denotes the maximum number of iterations
    maxiter <- lriter
    #initialize a loop that will end if either: 
    #1. the number of iterations exceeds the maximum specified by the user; or
    #2. the difference in the absolute value of the log of the determinant of the sigma matrix 
    #falls below the tolerance specified by the user
    while (iter <= maxiter & pcd >= lrtol) {
      #get the estimation results for the next iteration of the SUR regression
      isig <- solve(sig)
      mk <- sum(num_var)
      xzx <- matrix(0, mk, mk)
      xzy <- matrix(0, mk, 1)
      i <- 1
      lbi <- 1
      ubi <- 0
      while (i <= length(num_var)) {
        ubi <- num_var[i] + ubi
        j <- 1
        lbj <- 1
        ubj <- 0
        while (j <= length(num_var)) {
          ubj <- num_var[j] + ubj
          xzx[lbi:ubi, lbj:ubj] <- isig[i, j] * xx[lbi:ubi, lbj:ubj]
          xzy[lbi:ubi] <- xzy[lbi:ubi] + isig[i, j] * xy[lbi:ubi, j]
          j <- j + 1
          lbj <- ubj + 1
        }
        i <- i + 1
        lbi <- ubi + 1
      }
      
      c <- solve(xzx)
      b <- c %*% xzy
      invRcR <- solve(R %*% c %*% t(R))
      b <- b - (c %*% t(R)) %*% invRcR %*% (R %*% b - z)
      c <- c - (c %*% t(R)) %*% invRcR %*% (R %*% c)
      
      #CHANGED AS BELOW
      #se<-sqrt(abs(diag(c)))
      se <- sqrt(diag(c))
      t_temp <- b / se
      t1 <- t_temp
      t2 <- matrix(0, length(t1), 1)
      df <- matrix(0, g, 1)
      lb <- 1
      ub <- 0
      i <- 1
      while (i <= g) {
        ub <- num_var[i] + ub
        df[i] <- dim(hdataset)[1] - num_var[i] - dim(R)[1]
        for (j in lb:ub) {
          t2[j] <- 2 * pt(abs(t1[j]), df[i], lower.tail = F)
        }
        lb <- ub + 1
        i <- i + 1
      }
      
      #get sigma matrix for updated parameters and divide by proper divisor
      sig <-
        LRsseh(
          lhs_vars = lhs_vars,
          rhs_vars = rhs_vars,
          num_var = num_var,
          b = b,
          data = hdataset,
          g = g
        )
      if (lrdv == 1) {
        sig <- sig / dim(hdataset)[1]
      }
      #calculate the log of the determinant of sigma and the percentage change in it
      lnsig_n <- log(det(sig))
      pcd <- abs((lnsig_n - lnsig_o) / lnsig_o) * 100
      #increment the iteration number
      iter <- iter + 1
      #if verbose is true, display the iteration number and the current log of the determinant of the sigma matrix
      if (verbose == T) {
        print(paste("ITERATION ", iter, sep = ""))
        print(paste("LOG OF DETERMINANT OF SIGMA = ", lnsig_n, sep = ""))
      }
      
      #replace the old log of the determinant of sigma with the new one
      lnsig_o <- lnsig_n
      
    }
    
    #initialize a variety of variables for use in calculating model statistics model
    #many of these are not used in the R code
    yy <- 0
    sse <- 0
    sumy <- 0
    ybar <- 0
    dw <- 0
    ef <- matrix(0, 1, g)
    #readisk<-0
    tdwf <- matrix(0, N_firms, g)
    e2 <- matrix(0, dim(dtax1), g)
    ef2 <- matrix(0, 1, g)
    e3 <- matrix(0, dim(dta)[1], g)
    #tdw3<-matrix(0,g,1)
    y <- matrix(0, dim(dta)[1], g)
    e <- matrix(0, dim(dtax1), g)
    e1 <- e3
    tsumy <- matrix(0, g, 1)
    tdw <- matrix(0, g, 1)
    
    #initialize the variable tracking the current equation
    i <- 1
    #initialize variables marking the columns of x data that correspond to the current equation
    lb <- 1
    ub <- 0
    #loop until the variable indexing equations exceeds the number of equations
    while (i <= g) {
      #properly increment the upper bound of the columns of x data that correspond to the current equation
      ub <- num_var[i] + ub
      
      #assign dependent variable original values to the current column of y
      y[, i] <- dta[, lhslocs[i]]
      #assign residuals from transformed data for the current equation to the current column of e
      e[, i] <- dtay1[, i] - dtax1[, lb:ub] %*% b[lb:ub]
      #assign residuals from the original data for the current equation to the current column of e3
      e3[, i] <- dta[, lhslocs[i]] - dta[, rhslocs[lb:ub]] %*% b[lb:ub]
      
      #record the sum of the current y variable
      tsumy[i] <- tsumy[i] + sum(y[, i])
      
      #initialize variable tracking the firm currently in use
      k <- 1
      #initialize variables marking the rows that correspond to the firm currently in use
      lbk <- 1
      ubk <- 0
      lbkk <- 1
      ubkk <- 0
      #loop until the variable indexing firms exceeds the number of firms
      while (k <= N_firms) {
        #properly increment the upper bound of the rows that correspond to firm currently in use
        ubk <- ubk + idtframe$apps[k] - 1
        ubkk <- ubkk + idtframe$apps[k]
        
        #USES KK INSTEAD OF K AS IN ORIGINAL
        #properly assign lagged residuals with 0 in the initial position
        e2[lbkk:ubkk, i] <- c(0, e[lbkk:(ubkk - 1), i])
        #calculate the component of the durbin watson statistic for the current firm
        tdwf[k, i] <- sum((e[lbkk:ubkk, i] - e2[lbkk:ubkk]) ^ 2) - e[lbk, i] ^ 2
        #calculate the sum of the current durbin watson components and store it, this will be redone repeatedly until the very end of the loop, when it will be calculated properly
        tdw[i] <- sum(tdwf[, i])
        #increment the variable marking the firm currently in use
        k <- k + 1
        #increment the variabes marking the lower bound of the rows currently in use, lbk LOOKS VERY WRONG(?)
        lbk <- ubk + 1
        lbkk <- ubkk + 1
      }
      #increment the equation currently in use
      i <- i + 1
      #properly increment the lower bound of the columns of x data currently in use
      lb <- ub + 1
    }
    
    #calculate sum square errors based on residuals produced using transformed data
    sse <- sse + diag(t(e) %*% e)
    #calculate sum square errors based on residuals produced using original data
    sse3 <- diag(t(e3) %*% e3)
    #calculate final sigma matrix
    sig <- t(e3) %*% e3 / dim(hdataset)[1]
    #calculate y`y matrix
    yy <- yy + t(y) %*% y
    sumy <- sumy + tsumy
    #store components of durbin watson
    dw <- dw + tdw
    #store means of y
    ybar <- sumy / dim(hdataset)[1]
    #finish calculation of durbin watson coefficients
    dw <- dw / sse
    
    
    #initialize the variable marking the equation currently in use
    i <- 1
    #initialize variables marking the columns of x data currently in use
    lb <- 1
    ub <- 0
    #initialize vectors for various model statistics
    sst <- matrix(0, g, 1)
    rsq <- matrix(0, g, 1)
    rbsq <- matrix(0, g, 1)
    
    #loop until the variable indexing the equations exceeds the number of equations
    while (i <= g) {
      #properly increment the variable marking the upper bound of the columns of x data currently in use
      ub <- num_var[i] + ub
      #calculate model statistics
      sst[i] <- yy[i, i] - dim(hdataset)[1] * ybar[i] ^ 2
      rsq[i] <- 1 - sse3[i] / sst[i]
      rbsq[i] <- 1 - ((dim(hdataset)[1] - 1) / df[i]) * (1 - rsq[i])
      #properly increment the variable marking the lower bound of the columns off x data currently in use
      lb <- ub + 1
      #increment the variable tracking the equation currently in use
      i <- i + 1
    }
    #calculate the correlation matrix of the estimates
    s <- diag(1 / sqrt(diag(c)))
    cm <- s %*% c %*% t(s)
    #store the number of observations
    nobs <- dim(hdataset)[1]
    
    #initialize the object containing the models to be returned
    modelsout <- list(NULL)
    #initialize the variable tracking the equation currently in use
    i <- 1
    #initialize the variables marking the right hand side variables currently in use
    lb <- 1
    ub <- 0
    #loop until the variable indexing the equations exceeds the number of equations
    while (i <= g) {
      #properly increment the upper bound of the right hand side variables currently in use
      ub <- num_var[i] + ub
      #create a table containing the estimation results
      tempmodel <- cbind(b, se, t1, t2)[lb:ub, ]
      #properly name the rows of the table
      row.names(tempmodel) <- rhs_vars[lb:ub]
      tempmodel <- as.data.frame(tempmodel)
      #properly name columns of the table
      names(tempmodel) <-
        c("Coefficient", "Standard.Error", "T.Statistic", "P.Value")
      #store the current table in the list of model results to be returned
      modelsout[[i]] <- tempmodel
      #increment the variable tracking the equation currently in use
      i <- i + 1
      #properly increment the lower bound of the right hand side variables currently in use
      lb <- ub + 1
    }
    
    #create new objects containing the covariance and correlation matrices
    covmatout <- c
    cormatout <- cm
    
    #create new object containing the sigma matrix
    sigmaout <- sig
    
    #create list of objects containing model statistics and properly name its components
    modelstatsout <- list(nobs, sst, sse3, df, rsq, rbsq, sig, dw, N_firms)
    names(modelstatsout) <-
      c(
        "Observations",
        "Total.Sum.of.Squares",
        "Residual.Sum.of.Squares",
        "Degrees.of.Freedom",
        "R.Squared",
        "R.Bar.Squared",
        "Sigma",
        "Durbin.Watson.Statistic",
        "Number.of.Firms"
      )
    
    #create list of parameters used during estimation and name them appropriately
    estparsout <- list(restrict.R, restrict.z, het, ac, lrdv, lriter, lrtol)
    names(estparsout) <-
      c("rmat", "zmat", "het", "ac", "lrdv", "lriter", "lrtol")
    
    #get original data set
    origdata <- data
    
    
    #compile full list of objects to be output and name them appropriately
    fulloutput <-
      list(
        lhs_vars,
        modelsout,
        covmatout,
        cormatout,
        sigmaout,
        modelstatsout,
        hdataset,
        estparsout,
        origdata
      )
    names(fulloutput) <-
      c(
        "Dependent.Variables",
        "Models",
        "Covariance.Matrix",
        "Correlation.Matrix",
        "Sigma.Matrix",
        "Model.Statistics",
        "Transformed.Data",
        "Estimation.Parameters",
        "Original.Data"
      )
    
    #return estimation results
    return(fulloutput)
  }



#This function gets sum of squared errors for groups in each equation and in system
#The arguments are as follows:
# 1) lhs_vars: the dependent variables in the system of equations
# 2) rhs_vars: the vector containing each equation's independent variables, 
#    concatenated in the order of the corresponding dependent variables
# 3) num_var: the number of independent variables in each equation
# 4) b: the current parameter estimates in use
# 5) data: the dataset to be used for the regression
# 6) g: the number of equations in the system.
#This function returns a gxg matrix called sig, which is the sigma matrix for the system of equations.

LRsseh <- function(lhs_vars, rhs_vars, num_var, b, data, g) {
  #get number of observations
  #THIS IS PROBABLY UNNEEDED, COPIED DIRECT FROM GAUSS CODE
  nobs <- dim(data)[1]
  #initialize matrices of dependent variables and errors
  e <- matrix(0, dim(data)[1], g)
  y <- matrix(0, dim(data)[1], g)
  #intialize loop variables
  i <- 1
  lb <- 1 + g
  ub <- g
  #initialize results variables
  #WHY IS yy HERE? NOT USED IN ANY CALCULATIONS OR RETURNED
  sig <- 0
  yy <- 0
  #begin loop over equations
  #AV: i indexes in the equation
  while (i <= g) {
    #set proper upper bound for data matrix columns for the current equation's X data
    ub <- num_var[i] + ub
    #calculate errors for current equation
    e[, i] <- data[, i] - data[, lb:ub] %*% b[(lb - g):(ub - g)]
    #set y for current equation
    #WHY IS THIS HERE? NOT USED IN ANYTHING RETURNED BY THE PROGRAM
    y[, i] <- data[, i]
    #increment equation number
    i <- i + 1
    #set proper lower bound for the data matrix columns for the next equation's X data
    lb <- ub + 1
  }
  #calculate the sigma matrix
  sig <- t(e) %*% e + sig
  #calculate the covariance matrix for the dependent variables
  #WHY IS THIS DONE? NOT RETURNED
  yy <- t(y) %*% y + yy
  #return sigma matrix
  return(sig)
}








#This function produces an additional explicit share equation based on the initial results.
#AV: This is the share equation that was left out of the initial estimation because of colinearity (shares add up to 1)
#Its arguments are as follows:
# 1) hetest: the object returned by the subroutine hetsur, containing the results of a seemingly unrelated regression 
#    with heteroskedasticity and/or autocorrelation
# 2) newsharename: the name of the dependent variable for which a new equation should be produced
# 3) trendvar: the name, if any, of the trend variable interacted with the input price variables in the main equation

#This function returns an augmented version of the hetsur result
#It adds a new dependent variable to the list of dependent variables, 
#a new model results frame to the list of models, and the original dataset to the full hetsur results object
#If necessary, it also adds a column containing the omitted share to the original dataset
#Different methods are used for 2 factor models and 3+ factor models due to the way single column objects are handled
#It augments the Dependent.Variables and Models components of the list returned by hetsur

est_share_new <- function(hetest, newsharename, trendvar = "trend") {
  #get the names of the dependent variables for which equations have already been estimated
  tempdepvars <- hetest$Dependent.Variables
  
  #get the original data
  origdata <- hetest$Original.Data
  
  #locate the current dependent variables within the original dataset
  tempdepvarlocs <- match(tempdepvars, names(origdata))
  
  #get the number of equations already estimated
  numeqs <- length(tempdepvars)
  
  if (numeqs == 2) {
    #create a duplicate of the result for the first share equation
    tempneweq <- hetest$Models[[2]]
    
    #check whether data for the second share already exists in the dataset, and, if not, create a column for 
    #the second share and name it appropriately
    if ((newsharename %in% names(origdata)) == F) {
      origdata$tempnew <- 1 - origdata[, tempdepvarlocs[2]]
      names(origdata)[dim(origdata)[2]] <- newsharename
    }
    
    #calculate the intercept of the second share equation and recalculate the parameter's t-statistic and p-value appropriately
    tempneweq[1, 1] <- 1 - tempneweq[1, 1]
    tempneweq[1, 3] <- tempneweq[1, 1] / tempneweq[1, 2]
    tempneweq[1, 4] <-
      2 * pt(abs(tempneweq[1, 3]),
             hetest$Model.Statistics$Degrees.of.Freedom[2],
             lower.tail = F)
    
    #calculate the coefficients for non-intercept coefficients of the second share equation as well as their t-statistics
    tempneweq[2:dim(tempneweq)[1], c(1, 3)] <-
      tempneweq[2:dim(tempneweq)[1], c(1, 3)] * -1
    
    
    
    #initialize a new object for the augmented hetsur results to be returned
    hetestaug <- hetest
    hetestaug$Dependent.Variables[3] <- newsharename
    hetestaug$Models[[3]] <- tempneweq
  }
  
  
  #the following block of code generalizes the methods found in gauss files called EST_SLM3, EST_SLM4, and EST_SLM5.
  if (numeqs > 2) {
    #check whether data for the new share already exists in the dataset, and, if not, create a column for the additional share and name it appropriately
    if ((newsharename %in% names(origdata)) == F) {
      #get the locations of the share variables in the original dataset for which equations already exist
      currentshares <- origdata[, tempdepvarlocs[-1]]
      
      #sum the share variables for every observation
      currentsharetots <- apply(origdata, MARGIN = 1, FUN = sum)
      
      #store the additional share in a new column and name it appropriately
      origdata$tempnew <- 1 - currentsharetots
      names(origdata)[dim(origdata)[2]] <- newsharename
    }
    
    #create a placeholder for the new equation
    tempneweq <- hetest$Models[[2]]
    
    #create a matrix in which to store the coefficients of the already estimated share equations and name its rows appropriately
    sharemodcoefs <- matrix(0, dim(tempneweq)[1], numeqs)
    row.names(sharemodcoefs) <- row.names(tempneweq)
    
    #recreate the num_var variable from the original hetsur call and fill its first element appropriately
    tempnumvar <- rep(0, numeqs)
    tempnumvar[1] <- dim(hetest$Models[[1]])[1]
    
    #fill the matrix of share equation coefficients and the tempnumvar variable
    for (i in 2:length(hetest$Models)) {
      sharemodcoefs[, (i - 1)] <- hetest$Models[[i]][, 1]
      tempnumvar[i] <- dim(hetest$Models[[i]])[1]
    }
    
    #store the covariance matrix for the existing model
    covmat <- hetest$Covariance.Matrix
    
    #sum the coefficients for each variable on the right hand side of the share equations
    sharemodcoeftots <- apply(sharemodcoefs, MARGIN = 1, FUN = sum)
    
    #calculate the value of the intercept in the new share equation
    tempneweq[1, 1] <- 1 - sharemodcoeftots[1]
    
    #calculate the value of the remaining coefficients in the new share equation
    tempneweq[-1, 1] <- -1 * sharemodcoeftots[-1]
    
    #calculate the locations of each of the constant terms in the covariance matrix
    constindx <- cumsum(tempnumvar)[-numeqs] + 1
    
    #create a temporary variable in which to store the variance of the intercept term in the new equation
    tempconstvar <- 0
    
    #loop twice over the locations of the constants in the covariance matrix (once for rows, once for columns)
    for (i in 1:length(constindx)) {
      for (j in 1:length(constindx)) {
        #store the current row and column
        tempr <- constindx[i]
        tempc <- constindx[j]
        #add the covariance matrix value in the cell corrseponding to the current row and column to the temporary variable for the variance of the intercept term in the new share equation
        tempconstvar <- tempconstvar + covmat[tempr, tempc]
      }
    }
    
    #calculate the standard error of the intercept term in the new equation
    tempneweq[1, 2] <- sqrt(tempconstvar)
    
    #calculate the upper and lower bounds denoting the locations of non-intercept right hand side variables for the share equations in the covariance matrix
    varindx1 <- cumsum(tempnumvar)[-numeqs] + 2
    varindx2 <- cumsum(tempnumvar)[-1]
    
    #DELETE THIS?
    testrhs <-
      c(
        row.names(hetest$Models[[1]]),
        row.names(hetest$Models[[2]]),
        row.names(hetest$Models[[3]])
      )
    
    #create a temporary variable in which to store the variances of the non-intercept terms of the new share equation
    tempvarvar <- rep(0, tempnumvar[2] - 1)
    
    #loop twice over the number of equations (once for rows, once for columns)
    for (i in 1:length(varindx1)) {
      for (j in 1:length(varindx1)) {
        #create the current row and column ranges
        temprs <- c(varindx1[i]:varindx2[i])
        tempcs <- c(varindx1[j]:varindx2[j])
        #add the diagonal of the block of the covariance corresponding to the current row and column ranges to the temporary variable for the variance of the non-intercept terms in the new share equation
        tempvarvar <- tempvarvar + diag(covmat[temprs, tempcs])
      }
    }
    
    #calculate the standard errors of the non-intercept terms in the new equation
    tempneweq[(2:tempnumvar[2]), 2] <- sqrt(tempvarvar)
    
    #calculate the t-statistics for all terms in the new equation
    tempneweq[, 3] <- tempneweq[, 1] / tempneweq[, 2]
    
    #calculate the p-values for all terms in the new equation
    for (i in 1:tempnumvar[2]) {
      tempneweq[i, 4] <-
        2 * pt(abs(tempneweq[i, 3]),
               hetest$Model.Statistics$Degrees.of.Freedom[2],
               lower.tail = F)
    }
    
    #create the augmented version of hetest to be modified and returned
    hetestaug <- hetest
    #add the new dependent variable to the Dependent Variables component of the object to be returned
    hetestaug$Dependent.Variables[numeqs + 1] <- newsharename
    
    #check if the trend variable was included in the share equations and, if so, remove it from the final share equation
    if (trendvar %in% row.names(tempneweq)) {
      trendrow <- match(trendvar, row.names(tempneweq))
      tempneweq <- tempneweq[-trendrow, ]
    }
    
    #add the new share equation to the Models component of the object to be returned
    hetestaug$Models[[numeqs + 1]] <- tempneweq
    
  }
  
  #add the modified original data to the object to be returned and name it appropriately.
  hetestaug[[length(hetestaug)]] <- origdata
  
  #return the output
  return(hetestaug)
  
}




#This function provides predicted values for all estimated variables and adds them to the Original.Data 
#component of the hetest object returned by est_share_new()
#The arguments are as follow:
# 1) hetest: the object returned by hetsur(), after having been augmented by est_share_new()
# 2) newdata: if a dataset other than hetest$Original.Data should be used for prediction, provide it here
#if newdata is not null, then the Original.Data component of hetest will be replaced with the contents of newdata

prehetsur <- function(hetest, newdata = NULL) {
  #check whether alternative data has been supplied
  #if so, replace the original data component of the supplied hetest object with the contents of new data and initialize an object called predata to be used in calculating the predicted values of dependent variables
  #if not, simply initialize an object called predata to be used in calculating the predicted values of dependent variables
  if (length(newdata) == 0) {
    predata <- hetest$Original.Data
  }
  if (length(newdata) > 0) {
    predata <- newdata
    hetest$Original.Data <- predata
  }
  
  #loop over models
  for (i in 1:length(hetest$Models)) {
    #get the string name of the dependent variable for the equation currently under consideration
    tempdepvar <- hetest$Dependent.Variables[i]
    
    #initialize a vector for the predicted values
    temppred <- rep(0, dim(predata)[1])
    
    #get the string names of the independent variables for the equations currently under consideration
    temprhs <- row.names(hetest$Models[[i]])
    
    #get the locations of the columns containing the independent variables in the original data set
    temprhslocs <- match(temprhs, names(hetest$Original.Data))
    
    #create a matrix of the independent variables in the same order that the coefficients appear
    tempxmat <- as.matrix(hetest$Original.Data[, temprhslocs])
    
    #get the coefficients for the current model
    tempbeta <- hetest$Models[[i]][, 1]
    
    #calculate the predicted values for the current dependent variables
    temppred <- tempxmat %*% tempbeta
    
    #store the predicted values of the current dependent variable in a new column of the original dataset and name it appropriately
    hetest$Original.Data$tempnew <- temppred
    
    names(hetest$Original.Data)[dim(hetest$Original.Data)[2]] <-
      paste(tempdepvar, "hat", sep = "_")
    
  }
  
  #return the augmented hetest object
  return(hetest)
}


#This function calculates several measures of goodness of fit.
#Its only argument is hetest, which should be the object returned by hetest(), 
#after being augmented by est_share_new() and prehetsur(), in that order

#It  returns of list the following four components:
# 1) Uncentered.System.R.Square
# 2) Centered.System.R.Square
# 3) LR.Test.Statistic
# 4) P-Value

systemR2 <- function(hetest) {
  templhs <-
    hetest$Dependent.Variables[-length(hetest$Dependent.Variables)]
  
  templhsalocs <- match(templhs, names(hetest$Original.Data))
  templhsplocs <-
    match(paste(templhs, "hat", sep = "_"), names(hetest$Original.Data))
  
  templhsa <- hetest$Original.Data[, templhsalocs]
  templhsp <- hetest$Original.Data[, templhsplocs]
  
  tempdf <- dim(hetest$Models[[1]])[1] - 1
  
  res <- matrix(0, dim(hetest$Original.Data)[1], length(templhs))
  res2 <- matrix(0, dim(hetest$Original.Data)[1], length(templhs))
  dev <- matrix(0, dim(hetest$Original.Data)[1], length(templhs))
  
  for (i in 1:length(templhs)) {
    res[, i] <- templhsa[, i] - templhsp[, i]
    dev[, i] <- templhsa[, i] - mean(templhsa[, i])
    res2[, i] <- res[, i] - mean(res[, i])
  }
  ucr2 <- 1 - det(t(res) %*% res) / det(t(dev) %*% dev)
  
  cr2 <- 1 - det(t(res2) %*% res2) / det(t(dev) %*% dev)
  
  lrstat <- -1 * dim(hetest$Original.Data)[1] * log(1 - cr2)
  
  pvalue <- pchisq(q = lrstat,
                   df = tempdf,
                   lower.tail = F)
  
  gofoutput <- list(ucr2, cr2, lrstat, pvalue)
  names(gofoutput) <-
    c(
      "Uncentered.System.R.Square",
      "Centered.System.R.Square",
      "LR.Test.Statistic",
      "P-Value"
    )
  return(gofoutput)
}







surcostRmat <-
  function(lhs_vars,
           rhs_vars,
           num_var,
           factors,
           constantvar = "intercept",
           verbose = T) {
    #create the vectors of share and price variable names
    sharevars <- paste("s", factors, sep = "")
    pricevars <- paste("w", factors, sep = "")
    
    #check that the order of shares matches exactly their order in the vector of the dependent variables
    for (i in 1:length(sharevars)) {
      if (sharevars[i] != lhs_vars[i + 1]) {
        print("BAD ORDER OF SHARES")
        return("BAD ORDER OF SHARES")
      }
    }
    
    #initialize a list of the variables in each equation
    eqvarlist <- list(NULL)
    
    #fill the list of variables in each equation
    lb <- 1
    ub <- 0
    i <- 1
    while (i <= length(lhs_vars)) {
      ub <- ub + num_var[i]
      eqvarlist[[i]] <- rhs_vars[lb:ub]
      lb <- ub + 1
      i <- i + 1
    }
    
    
    outputR <- matrix(0, length(rhs_vars) - num_var[1], length(rhs_vars))
    outputz <- matrix(0, length(rhs_vars) - num_var[1], 1)
    
    
    #get the right hand side variables of the cost equation
    ceqrhs <- rhs_vars[1:num_var[1]]
    
    #get the position within rhs_vars where each equation's (not counting the first equation) variables begin
    shcolinit <- cumsum(num_var)[-length(num_var)] + 1
    
    #initialize a variable tracking the current row of the R matrix
    rowR <- 1
    #loop over factors
    for (i in 1:length(factors)) {
      #store the current factor, price, share equation, and column index
      tempfact <- factors[i]
      tempprice <- pricevars[i]
      tempsheq <- eqvarlist[[i + 1]]
      tempcolinit <- shcolinit[i]
      
      #loop over the variables in the current share equation
      for (j in 1:length(tempsheq)) {
        #get the current variable
        temprhsvar <- tempsheq[j]
        
        #if verbose is true, print the current variable for debugging purposes
        if (verbose == T) {
          print(temprhsvar)
        }
        
        #check whether the current variable is the intercept term, the price for the current cost share, or another variable and fill in the current row of the R matrix appropriately
        if (temprhsvar == constantvar) {
          tempcloc <- which(ceqrhs == tempprice)
          outputR[rowR, tempcloc] <- -1
          outputR[rowR, tempcolinit + j - 1] <- 1
        }
        if (temprhsvar != constantvar) {
          if (temprhsvar == tempprice) {
            #get the location of the current price variable squared term in the cost equation
            tempcloc <-
              which(ceqrhs == paste(temprhsvar, "*", temprhsvar, "/2", sep = ""))
            
            outputR[rowR, tempcloc] <- -1
            
            outputR[rowR, tempcolinit + j - 1] <- 1
          }
          if (temprhsvar != tempprice) {
            #create the two possible ways of writing the interaction in the cost equation
            tempceqnames <-
              c(
                paste(temprhsvar, "*", tempprice, sep = ""),
                paste(tempprice, "*", temprhsvar, sep = "")
              )
            #determine which way was used
            tempceqnamecheck <- which(tempceqnames %in% ceqrhs)
            #get the location of the current variable's interaction term in the cost equation
            tempcloc <- which(ceqrhs == tempceqnames[tempceqnamecheck])
            
            
            outputR[rowR, tempcloc] <- -1
            
            outputR[rowR, tempcolinit + j - 1] <- 1
          }
        }
        #increment the variable tracking the row of the R matrix currently in use
        rowR <- rowR + 1
      }
      
    }
    
    
    #construct an output list and return the results
    fulloutput <- list(outputR, outputz)
    return(fulloutput)
  }






#this function adds interactive terms to a data frame
addvars <- function(data, rhs_vars) {
  #get a unique listing of the independent variable terms
  redvars <- unique(rhs_vars)
  
  #get the old column names
  oldvars <- names(data)
  
  #check which independent variable terms are already in the data set and drop them to create a list of variables to be created
  oldvarlocs <- which(redvars %in% oldvars)
  
  newvars <- redvars[-oldvarlocs]
  
  if (length(newvars) > 0) {
    #loop over the variables to be added
    for (i in 1:length(newvars)) {
      #create the new variable and name it appropriately
      data$tempnew <- with(data, eval(parse(text = newvars[i])))
      names(data)[dim(data)[2]] <- newvars[i]
      
    }
  }
  #return the augmented data set
  return(data)
  
}