// Code for 1-cut
#include <oxstd.h>
#include <oxprob.h>
#include <oxdraw.h>

// uses PcGive in case Autometrics is used for the selection
// (otherwise could also use the PcFiml class)
#import <packages/PcGive/pcgive_ects>

#import "simutils"


static FormulateModel(model, asModel, asFixed, cT)
{
	model.ClearEstimation;
	model.DeSelect(); 
	model.Select(Y_VAR, {"y", 0, 0});
	model.Select(X_VAR, asModel);
	model.Select(U_VAR, asFixed);
	model.SetSelSample(1, 1, cT, 1);
	model.SetMethod(M_OLS);
}

/**
Runs the 1-cut experiments.
@param cRep 	no of replications M
@param vBeta	coefficients of relevant variables (also determines number of relevant variables)
@param cN		total number of variables N
@param cT		sample size
@param dP_a	significance level for selection
@param bForcedConstant TRUE: force Constant in all models, FALSE: Constant is free (default is TRUE)
@param bUseAutometrics TRUE: use Autometrics, else use 1-cut (default is FALSE)
@param bFixedX TRUE: use fixed regressors (default is FALSE)
@param dSigma	>0: sigma for N[0,1] errors, < 0: use student-t(3) instead of normal (default is 1)
@param dMu		mean (default is 0)
@param bDiagnostics TRUE:use diagnostic testing with Autometrics (default TRUE)
@param sOutliers string definining outlier mode	 (default is "none")
@return an array with two NxM matrices:	beta and beta after two-step bias correction.
When IIS is used, the dimension is (N+T)xM.
**/
Run_1cut(cRep, vBeta, cN, cT, dP_a, bForcedConstant, bUseAutometrics, bFixedX, dSigma, dMu,
	bDiagnostics, sOutliers)
{
	// DGP parameters
	decl beta_dgp = vec(vBeta)';

	decl cnrel = sizerc(beta_dgp);				// no of relevant variables
	if (cN < cnrel)
		oxrunerror("Need cN >= dim(beta)");
	decl beta_all_dgp = beta_dgp ~ zeros(1, cN - cnrel);

	decl asx = {};								// create names of all X's
	for (decl k = 1; k <= cN; ++k)  
	{
		asx ~= { sprint("x", k) };				// Contemporaneous x's
	}
	
	decl ciis = sOutliers == "IIS" ? cT : 0;
	beta_all_dgp ~= zeros(1, ciis);
	for (decl k = 1; k <= ciis; ++k)  
	{
		asx ~= { sprint("I:", k) };				// impulses
	}

	decl asx_dgp = asx[ : cnrel - 1];

	// formulate the GUM
	decl asfixed_gum = bForcedConstant ? {"Constant"} : {}, cfixed = sizeof(asfixed_gum);
	decl asx_gum = asx[ : cN - 1];
	decl cx_gum = sizeof(asx_gum);
		
	// storage, first of DGP
	decl mbeta_dgp = zeros(cnrel, cRep);
	decl mtstat_dgp = mbeta_dgp;
	decl msigma_dgp = zeros(1, cRep);
	
	// then of selected model
	decl mbeta_sel = zeros(cN + ciis, cRep);
	decl mtstat_sel = mbeta_sel;
	decl msigma_sel = zeros(1, cRep);
	decl mbeta_bc1step_sel = mbeta_sel;
	decl mbeta_bc2step_sel = mbeta_sel;
	
	decl i, mx, y, c_alpha_gum = quant(1 - (dP_a / 2), cT - cN - cfixed);

	PcGive::ShowBanner(FALSE);					// no banner everytime object is created
	
	decl time = timer();
	ranseed(-1);

	if (bFixedX)
		mx = rann(cT, cN);						// draw new regressors

	for (i = 0; i < cRep; i++)	  			
	{
		decl csel, vcoefs, vtstat, idx_in, idx_out, cout, bc1step, bc2step;

//		println("============== rep=", i);
		if (!bFixedX)
			mx = rann(cT, cN);					// draw new regressors

		y = mx[][ : cnrel - 1] * beta_dgp';
												// compute new Y
		if (dSigma > 0)
			y += dSigma * rann(cT, 1);
		else
			y += -dSigma * rant(cT, 1, 3);			// negative sigma: use student-t(3) instead of normal

		// create the database for modelling
		decl model = new PcGive();
		model.Create(1/*freq*/, 1, 1, 1, cT);		
		model.Append(y, {"y"});
		model.Append(mx, asx[ : cN - 1]);
		model.Deterministic(-1);					// Create Constant
		model.SetPrint(FALSE);						// switch printing off
	
		//==== First formulate and estimate DGP as baseline
		FormulateModel(model, asx_dgp, asfixed_gum, cT);
		model.Estimate();

		// store DGP estimates (fixed Constant at end automatically ignored in assignment)
		mbeta_dgp[][i]  = vcoefs = model.GetPar();
		msigma_dgp[][i] = sqrt(model.GetResVar());
		mtstat_dgp[][i] = fabs(vcoefs ./ model.GetStdErr());

		//==== Formulate the GUM
		FormulateModel(model, asx_gum, asfixed_gum, cT);
		if (bUseAutometrics)
		{
			model.Autometrics(dP_a, sOutliers, 1);
			if (!bDiagnostics)
				model.AutometricsSet("pvalue_tests", 0);   // no diagnostic tests
//			model.AutometricsSet("print", 3);			   // print search
		}
//		model.SetPrint(1);
		model.Estimate();

		// 1-cut: |t-value| to determine which ones stay in
		if (!bUseAutometrics)
		{
			vtstat = fabs(model.GetPar() ./ model.GetStdErr())[ : cN - 1];
			// NB: Select needs an array, just a string won't work
			FormulateModel(model, array(asx_gum[vecindex(vtstat .>= c_alpha_gum)]), asfixed_gum, cT);
			model.Estimate();
		}		
		// get position of selected coeffs, except last one: fixed Constant
		csel = model.GetParCount() - cfixed;		   // no selected
		idx_in = csel == 0 ? <> : strfind(asx, model.GetParNames()[ : csel - 1]);
		// idx_in: indices of selected vars; idx_out: indices of dropped vars
		idx_out = dropc(range(0, cN + ciis - 1), idx_in);
		cout = sizerc(idx_out);

		msigma_sel[][i] = sqrt(model.GetResVar());
		// store the results, if any selected (otherwise all betas are zero)
		if (csel)
		{
			mbeta_sel[idx_in][i] = vcoefs = model.GetPar()[ : csel - 1];
			mtstat_sel[idx_in][i] = vtstat = fabs(vcoefs ./ model.GetStdErr()[ : csel - 1]);
			
			// compute the bias correction
			[bc1step, bc2step] = GetBiasCorrections(vcoefs, vtstat,  quant(1 - (dP_a / 2), cT - model.GetParCount()));
			mbeta_bc1step_sel[idx_in][i] = bc1step;
			mbeta_bc2step_sel[idx_in][i] = bc2step;
		}
		delete model;
	}

	println("\n============= ", " M=", cRep, " alpha=", dP_a, " c_alpha(t_N)=", c_alpha_gum);
	println("  Selection by: ", bUseAutometrics ? "Autometrics" : "1-cut",
		bUseAutometrics && !bDiagnostics ? " diagnostics OFF" : "",
		bUseAutometrics ? (" outliers:" ~ sOutliers) : "");
	println("  1-cut DGP/GUM: N=", cN, " T=", cT,
		" Constant=", bForcedConstant ? "Forced" : "none", bFixedX ? " X fixed" : " X not fixed",
		dSigma > 0 ? sprint(" sigma=", dSigma) : sprint(" student-t", -dSigma), " mu=", dMu, " beta=", beta_dgp);

	println("\nOriginal regression coefficient estimates", "%c", {"DGP", "beta","|t-values|", "bias", "RMSE"}, "%r", asx_dgp,
		beta_dgp' ~ meanr(mbeta_dgp) ~ meanr(mtstat_dgp) ~ meanr(mbeta_dgp - beta_dgp') ~ URMSE(mbeta_dgp, beta_dgp));
	println("Original regression equation standard error",
		meanr(msigma_dgp));

	println("\nSelection results");
	if (cnrel < cN)
	{
		println("Gauge:               ", double(meanc(meanr(mbeta_sel[cnrel : ][] .!= 0))) );
		if (ciis)
			println("Gauge (not dummies): ", double(meanc(meanr(mbeta_sel[cnrel : cN - 1][] .!= 0))) );
	}
	if (cnrel)
		println("Potency:             ", double(meanc(meanr(mbeta_sel[ : cnrel - 1][] .!= 0))) );
	
	// don't print all irrelevant variables if there are too many
	decl ccut = cN - cnrel > 10 ? cnrel : cN;
	println("Selection coefficient estimates and t-stats: conditional averages", ccut < cN ? " (irrelevant not printed)" : "",
		"%c", {"retention","coefficient", "|t-value|", "URMSE", "CRMSE", "coefBC1step", "coefBC2step"}, "%r", asx_gum,
		meanr(mbeta_sel[ : ccut - 1][] .!= 0) ~ CMEAN(mbeta_sel[ : ccut - 1][]) ~ CMEAN(mtstat_sel[ : ccut - 1][]) ~
			URMSE(mbeta_sel[ : ccut - 1][], beta_all_dgp[ : ccut - 1]) ~ CRMSE(mbeta_sel[ : ccut - 1][], beta_all_dgp[ : ccut - 1]) ~ 
			CMEAN(mbeta_bc1step_sel[ : ccut - 1][]) ~ CMEAN(mbeta_bc2step_sel[ : ccut - 1][]));
	println("Selection regression equation standard error", meanr(msigma_sel));

	// exclude IIS dummies here
	PrintSelectionAverages(beta_all_dgp[ : cN - 1], mbeta_sel[ : cN - 1][], mbeta_bc1step_sel[ : cN - 1][], mbeta_bc2step_sel[ : cN - 1][]);
	
	if (ccut < cN || ciis)
	{
		// remove irrelevant variables if there are too many
		mbeta_sel = mbeta_sel[ : min(ccut, cN) - 1][];
		beta_all_dgp = beta_all_dgp[ : min(ccut, cN) - 1];
		mbeta_bc1step_sel = mbeta_bc1step_sel[ : min(ccut, cN) - 1][];
		mbeta_bc2step_sel = mbeta_bc2step_sel[ : min(ccut, cN) - 1][];
	}
	println("\nUnconditional bias of selection regression", "%c", {"unc.coeff", "unc.bias", "BC 1-step", "BC 2-step"}, "%r", asx,
		meanr(mbeta_sel) ~ UBIAS(mbeta_sel, beta_all_dgp) ~ UBIAS(mbeta_bc1step_sel, beta_all_dgp) ~ UBIAS(mbeta_bc2step_sel, beta_all_dgp));
	println("Unconditional RMSE of selection regression", "%c", {"URMSE", "BC 1-step", "BC 2-step"}, "%r", asx,
		URMSE(mbeta_sel, beta_all_dgp) ~ URMSE(mbeta_bc1step_sel, beta_all_dgp) ~ URMSE(mbeta_bc2step_sel, beta_all_dgp));
	println("Unconditional MSE of selection regression", "%c", {"UMSE", "BC 1-step", "BC 2-step"}, "%r", asx,
		sqr(URMSE(mbeta_sel, beta_all_dgp) ~ URMSE(mbeta_bc1step_sel, beta_all_dgp) ~ URMSE(mbeta_bc2step_sel, beta_all_dgp)));

	println("Conditional bias of selection regression", "%c", {"cond.coeff", "con.bias", "BC 1-step", "BC 2-step"}, "%r", asx,
		CMEAN(mbeta_sel) ~ CBIAS(mbeta_sel, beta_all_dgp) ~ CBIAS(mbeta_bc1step_sel, beta_all_dgp) ~ CBIAS(mbeta_bc2step_sel, beta_all_dgp));
	println("Conditional RMSE of selection regression", "%c", {"CRMSE", "BC 1-step", "BC 2-step"}, "%r", asx,
		CRMSE(mbeta_sel, beta_all_dgp) ~ CRMSE(mbeta_bc1step_sel, beta_all_dgp) ~ CRMSE(mbeta_bc2step_sel, beta_all_dgp));
	println("Conditional MSE of selection regression", "%c", {"CMSE", "BC 1-step", "BC 2-step"}, "%r", asx,
		sqr(CRMSE(mbeta_sel, beta_all_dgp) ~ CRMSE(mbeta_bc1step_sel, beta_all_dgp) ~ CRMSE(mbeta_bc2step_sel, beta_all_dgp)));

	println("finished in ", timespan(time));

	return {mbeta_sel, mbeta_bc2step_sel};
}
