Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
rightaway006 committed Sep 21, 2021
1 parent 3d91ccd commit 0d49567
Show file tree
Hide file tree
Showing 7 changed files with 1,309 additions and 0 deletions.
917 changes: 917 additions & 0 deletions Latex File of Research Lasso, Ridge, Elastic Net.tex

Large diffs are not rendered by default.

22 changes: 22 additions & 0 deletions OscarExample.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
% Read Soil Dataset
data = xlsread('C:\Users\sarmad\Desktop\soil.xlsx');

% -------------- Separating features in separate variable ---------------
X = data(:, 1:15);

% -------------- Separating magnitude in separate variable --------------
y = data(:,16);




% Choose grid of parameter values to use
cvalues = [0; .01; .05; .1;.25;.5;.75;.9;1];
propvalues = [.0001; .001; .002; .00225; .0025; .00275; .0028; .0029; .003; .004; .005; .0075; .01;.025; .05; .1; .15;.2;.3;.4;.5;.6];

%%%% method = 2, chooses the sequential algorithm.

method = 2;
initcoef = [];
[CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method); % Calls function to perform optimization on the grid.

140 changes: 140 additions & 0 deletions OscarReg.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@


% X should be a matrix whose rows are the observations and columns are the
% predictors (n by p). The intercept is omitted (so the response and each
% predictor should have mean zero).

function [CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef)

p = length(X(1,:));
q=p*(p-1)/2;


% Standardize predictors to mean zero, variance 1, and center response to
% mean zero.

for i = 1:p
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
end;
y = y-mean(y);



corrmat=X'*X;
[ivec,jvec,svec]=find(corrmat);
Sparseavec=[ivec;ivec+p;ivec;ivec+p];
Sparsebvec=[jvec;jvec+p;jvec+p;jvec];
Element1vec=[svec;svec;-svec;-svec];

F=sparse(Sparseavec,Sparsebvec,Element1vec,2*p+q,2*p+q);

clear corrmat ivec jvec svec Sparseavec Sparsebvec Element1vec;

c=[-X'*y;X'*y;zeros(q,1)];
lowbound=[zeros(2*p,1);-inf(q,1)];

Sparse1vec=[ones(2*p+q,1);2;2;2;3;3;3];
Sparse2vec=[(1:(2*p+q))';1;p+1;2*p+1;2;p+2;2*p+1];
Elementvec=[ones(2*p,1);ones(q,1);1;1;-1;1;1;-1];
rowcount=2;
paircount=2*p+1;

initcoef1=[max(initcoef,0);-min(initcoef,0)];

for i=1:p
for j=(i+1):p
initcoef1=[initcoef1;max(abs(initcoef(i)),abs(initcoef(j)))];
if rowcount>2
Sparse1vec=[Sparse1vec;rowcount;rowcount;rowcount;rowcount+1;rowcount+1;rowcount+1];
Sparse2vec=[Sparse2vec;i;i+p;paircount;j;j+p;paircount];
Elementvec=[Elementvec;1;1;-1;1;1;-1];
end;
rowcount=rowcount+2;
paircount=paircount+1;
end;
end;


% Grid search over c values, then by proportion of bound. For each c, the
% constraint matrix is updated.

CoefMatrix = zeros(p,length(propvalues),length(cvalues));
SSMatrix = zeros(1,length(propvalues),length(cvalues));
dfMatrix = zeros(1,length(propvalues),length(cvalues));

for ccount = 1:length(cvalues)
cvalue = cvalues(ccount);
tempvec=[(1-cvalue)*ones(2*p,1);cvalue*ones(q,1)];
Elementvec(1:(2*p+q))=tempvec;
clear tempvec;

A=sparse(Sparse1vec,Sparse2vec,Elementvec,2*q+1,2*p+q);
initialnorm=A(1,:)*initcoef1;
for propcount = 1:length(propvalues)
tbound = propvalues(propcount)*initialnorm;
b=[tbound; zeros(2*q,1)];

if (ccount == 1)
if (propcount == 1)
start=zeros(2*p+q,1);
end;
elseif (propcount > 1)
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
for i=1:p
for j=(i+1):p
start=[start;max(abs(start(i)),abs(start(j)))];
end;
end;
else
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
for i=1:p
for j=(i+1):p
start=[start;max(abs(start(i)),abs(start(j)))];
end;
end;
end;


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%
%%%%%%
%%%%%% The TOMLAB package is now used as a quadratic programming solver.

%%%%%% IF AN ALTERNATIVE QUADRATIC PROGRAMMING SOLVER IS AVAILABLE,
%%%%%% REPLACE THIS SECTION WITH A CALL TO THAT SOLVER BASED ON
%%%%%% THE FORMULATION

% minimize (over x) : 0.5 x' F x + c' x
% subject to A x <= b and x >= lowbound
%

Prob = qpAssign(F, c, A, [], b, lowbound, [], start, [], [], [], [], [], [], [], []);
Result = tomRun('sqopt', Prob, 0);

x=Result.x_k;
exitflag = Result.ExitFlag;
conv = (exitflag == 0);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


CoefMatrix(:,propcount,ccount) = round((x(1:p)-x(p+1:2*p))*10^7)*10^(-7);
SSMatrix(:,propcount,ccount) = sumsqr(y-X*CoefMatrix(:,propcount,ccount));

EffParVec=unique(abs(CoefMatrix(:,propcount,ccount)));
EffParVec=EffParVec(EffParVec>0);
dfMatrix(:,propcount,ccount) = length(EffParVec);

if conv == 0
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
else
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
end;

end;

end;


48 changes: 48 additions & 0 deletions OscarSelect.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

function [CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method)

p = length(X(1,:));

% Standardize predictors to mean zero, variance 1, and center response to
% mean zero.

for i = 1:p
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
end;
y = y-mean(y);

if nargin < 6
method = 2;
if nargin < 5
initcoef = [];
end;
end;

cvalues=sort(cvalues);
propvalues=sort(propvalues);

if isempty(initcoef)
[initcoef] = regress(y,X);
end;
if length(initcoef)<p
error('initial estimate must be of length p');
end;
if min(cvalues)<0
error('all values of c must be nonnegative');
end;
if max(cvalues)>1
error('values for c cannot exceed 1');
end;
if min(propvalues)<=0
error('values for proportion must be greater than 0');
end;
if max(propvalues)>=1
error('values for proportion must be smaller than 1');
end;

if method == 1
[CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef);
end;
if method ~= 1
[CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef);
end;
83 changes: 83 additions & 0 deletions OscarSeqOpt.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

function [coefs df ssquares OrderMatrix conv] = OscarSeqOpt (tbound, cvalue, Xmatrix, y, start, OrderMatrix)

p = length(Xmatrix(1,:))/2;

sdresponse = std(y);
y = sqrt(p)*y/sdresponse;
% The rescaling of the response is for computational purposes only (makes the overall variance of y to be p), the
% coefficients will be rescaled back.

start = sqrt(p)*start/sdresponse;
lowbound = zeros(2*p,1);

flagvalue = 0;
itervalue = 0;
SolCoef = start(1:p)-start((p+1):(2*p));

% The sequential constrained least squares problem is now solved by adding
% an additional constraint at each step and iterating until convergence.

while (flagvalue == 0) && (itervalue < p^2)
nconstraints = length(OrderMatrix(:,1));
A1 = (1-cvalue)*ones(nconstraints,p)+cvalue*(p*ones(nconstraints,p)-OrderMatrix);
Amatrix = [A1 A1];
Bbound = (sqrt(p)*tbound/sdresponse)*ones(1,length(Amatrix(:,1)));


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% A quadratic programming solver is now used to solve the constrained least
% squares problem at each iteration.
%
% The problem is given as a constrained least squares in the form
% Minimize with respect to u:
%
% || y - Xmatrix u ||^2
%
% subject to:
% Amatrix u <= Bbound and u >= lowbound
%
% The following call to the least squares solver can be changed to use any solver if a more efficient one is
% available. This is the only thing that needs to be changed.

options = optimset('Display', 'off', 'LargeScale', 'off');
[x ssquare resid exitflag] = lsqlin(Xmatrix, y, Amatrix, Bbound, [], [], lowbound, [], start, options);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


SolCoef1 = round((x(1:p)-x((p+1):(2*p)))*10^7)*10^(-7);
[currcoef, neworder] = sort(-abs(SolCoef1));
sameaslast = [0; (currcoef(2:p) == currcoef(1:(p-1)))];
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
nblocksame = sum(startblocksame);
vi = (1:p)';
visbs = vi(logical(startblocksame));
viebs = vi(logical(endblocksame));
for j = 1:nblocksame
blockmean = mean(vi(visbs(j):viebs(j)));
vi(visbs(j):viebs(j)) = blockmean * ones(viebs(j) - visbs(j) + 1,1);
end;
[tempinvsort,vind] = sort(neworder);
a1weights = vi(vind)';
currcoef = -currcoef;
OrderMatrix = unique([OrderMatrix; a1weights],'rows');
flagvalue = 1;
test = round(SolCoef*10^7)*10^(-7)-round(SolCoef1*10^7)*10^(-7);
SolCoef = SolCoef1;
flag2 = sqrt(sumsqr(test));
if (flag2>10^(-7))
flagvalue = 0;
end;
itervalue = itervalue+1;
start = x;
end;

coefs = sdresponse*SolCoef/sqrt(p);
df = length(unique(currcoef(currcoef>0)));
ssquares = (sdresponse^2)*ssquare/p;
conv = ((itervalue < p^2) && (exitflag > 0));
78 changes: 78 additions & 0 deletions OscarSeqReg.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@


% X should be a matrix whose rows are the observations and columns are the
% predictors (n by p). The intercept is omitted (so the response and each
% predictor should have mean zero).

function [CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef)

p = length(X(1,:));

% Standardize predictors to mean zero, variance 1, and center response to
% mean zero.

for i = 1:p
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
end;
Xmatrix = [X -X]; % Need for positive and negative parts of beta
y = y-mean(y);

% Order initial estimate by magnitude, including ties.
% Initial estimate is used for first guess at ordering of coefficients and also to set maximum value
% for the bound t used in the constraint. The bound is given as proportion
% of the initial value.

[initcoeford, currorder] = sort(-abs(initcoef));
sameaslast = [0; (initcoeford(2:p) == initcoeford(1:(p-1)))];
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
nblocksame = sum(startblocksame);
vi = (1:p)';
visbs = vi(logical(startblocksame));
viebs = vi(logical(endblocksame));
for i = 1:nblocksame;
blockmean = mean(vi(visbs(i):viebs(i)));
vi(visbs(i):viebs(i)) = blockmean * ones(viebs(i) - visbs(i) + 1,1);
end;
[tempinvsort,vind] = sort(currorder);
a1 = vi(vind)';
initcoeford = -initcoeford;

% Grid search over c values, then by proportion of bound. For each c, the
% weighting is recomputed.

CoefMatrix = zeros(p,length(propvalues),length(cvalues));
SSMatrix = zeros(1,length(propvalues),length(cvalues));
dfMatrix = zeros(1,length(propvalues),length(cvalues));

for ccount = 1:length(cvalues)
OrderMatrix = a1;
weighting = a1;
cvalue = cvalues(ccount);
for i=1:p
weighting(i) = (1-cvalue)+cvalue*(p-i);
end;
for propcount = 1:length(propvalues)
tbound = propvalues(propcount)*weighting*initcoeford;
if (ccount == 1)
if (propcount == 1)
start=zeros(2*p,1);
end;
elseif (propcount > 1)
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
else
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
end;
[coefs df ssquares conv] = OscarSeqOpt(tbound, cvalue, Xmatrix, y, start, OrderMatrix);
CoefMatrix(:,propcount,ccount) = coefs;
SSMatrix(:,propcount,ccount) = ssquares;
dfMatrix(:,propcount,ccount) = df;
if conv == 0
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
else
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
end;
end;
end;


21 changes: 21 additions & 0 deletions soil.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
BaseSat,SumCation,CECbuffer,Ca,Mg,K,Na,P,Cu,Zn,Mn,HumicMatter,Density,pH,ExchAc,Diversity
2.34,0.1576,0.614,0.0892,0.0328,0.0256,0.01,0,0.08,0.184,3.2,0.122,0.0822,0.516,0.466,0.2765957
1.64,0.097,0.516,0.0454,0.0218,0.0198,0.01,0,0.064,0.112,2.734,0.0952,0.085,0.512,0.43,0.2613982
5.2,0.452,0.828,0.3306,0.0758,0.0336,0.012,0.24,0.136,0.35,4.148,0.1822,0.0746,0.554,0.388,0.2553191
4.1,0.3054,0.698,0.2118,0.0536,0.026,0.014,0.03,0.126,0.364,3.728,0.1646,0.0756,0.546,0.408,0.2401216
2.7,0.2476,0.858,0.1568,0.0444,0.0304,0.016,0.384,0.078,0.376,4.756,0.2472,0.0692,0.45,0.624,0.1884498
2.46,0.2256,0.828,0.1266,0.0478,0.0352,0.016,0.268,0.08,0.244,4.446,0.1984,0.0652,0.442,0.618,0.2370821
6.02,0.688,1.068,0.5366,0.0974,0.04,0.014,0,0.168,0.17,3.364,0.245,0.0804,0.532,0.394,0.1641337
5.54,0.6054,1.03,0.4684,0.0856,0.0354,0.016,0.038,0.198,0.146,3.46,0.2828,0.0778,0.52,0.442,0.2036474
1.96,0.136,0.612,0.075,0.0278,0.0212,0.012,0.016,0.086,0.196,5.288,0.2706,0.0732,0.506,0.49,0.2553191
2,0.1308,0.612,0.0686,0.0302,0.022,0.01,0.054,0.082,0.216,5.952,0.2662,0.075,0.504,0.49,0.2644377
1.3,0.093,0.64,0.047,0.0172,0.0168,0.012,0.004,0.07,0.134,2.938,0.3824,0.0664,0.474,0.558,0.1702128
1.22,0.0802,0.6,0.0382,0.0156,0.0164,0.01,0.026,0.07,0.124,2.178,0.3638,0.065,0.474,0.528,0.1854103
1.6,0.1214,0.688,0.0526,0.0306,0.0282,0.01,0.006,0.076,0.204,3.002,0.2466,0.0694,0.494,0.578,0.224924
1.28,0.0848,0.558,0.0304,0.0194,0.025,0.01,0,0.084,0.134,2.144,0.2264,0.0726,0.492,0.486,0.2340426
2.88,0.2526,0.812,0.1678,0.0464,0.0204,0.018,0.256,0.164,0.318,3.426,0.3036,0.0704,0.466,0.576,0.2492401
2.72,0.221,0.764,0.1452,0.042,0.0198,0.014,0.15,0.184,0.44,4.58,0.2394,0.0728,0.466,0.556,0.2492401
2.08,0.1344,0.586,0.0756,0.026,0.0228,0.01,0,0.15,0.144,3.752,0.3234,0.073,0.522,0.462,0.2340426
2.76,0.1788,0.59,0.1084,0.0382,0.0222,0.01,0,0.174,0.138,4.092,0.3048,0.0718,0.528,0.424,0.2462006
5.48,0.7074,1.198,0.5622,0.0988,0.0264,0.02,0.598,0.146,0.298,4.08,0.307,0.0722,0.516,0.512,0.2036474
5.6,0.659,1.142,0.5322,0.0856,0.0272,0.014,0.184,0.14,0.298,4.492,0.277,0.0796,0.516,0.498,0.2006079

0 comments on commit 0d49567

Please sign in to comment.