-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3d91ccd
commit 0d49567
Showing
7 changed files
with
1,309 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
% Read Soil Dataset | ||
data = xlsread('C:\Users\sarmad\Desktop\soil.xlsx'); | ||
|
||
% -------------- Separating features in separate variable --------------- | ||
X = data(:, 1:15); | ||
|
||
% -------------- Separating magnitude in separate variable -------------- | ||
y = data(:,16); | ||
|
||
|
||
|
||
|
||
% Choose grid of parameter values to use | ||
cvalues = [0; .01; .05; .1;.25;.5;.75;.9;1]; | ||
propvalues = [.0001; .001; .002; .00225; .0025; .00275; .0028; .0029; .003; .004; .005; .0075; .01;.025; .05; .1; .15;.2;.3;.4;.5;.6]; | ||
|
||
%%%% method = 2, chooses the sequential algorithm. | ||
|
||
method = 2; | ||
initcoef = []; | ||
[CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method); % Calls function to perform optimization on the grid. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
|
||
|
||
% X should be a matrix whose rows are the observations and columns are the | ||
% predictors (n by p). The intercept is omitted (so the response and each | ||
% predictor should have mean zero). | ||
|
||
function [CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef) | ||
|
||
p = length(X(1,:)); | ||
q=p*(p-1)/2; | ||
|
||
|
||
% Standardize predictors to mean zero, variance 1, and center response to | ||
% mean zero. | ||
|
||
for i = 1:p | ||
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i)); | ||
end; | ||
y = y-mean(y); | ||
|
||
|
||
|
||
corrmat=X'*X; | ||
[ivec,jvec,svec]=find(corrmat); | ||
Sparseavec=[ivec;ivec+p;ivec;ivec+p]; | ||
Sparsebvec=[jvec;jvec+p;jvec+p;jvec]; | ||
Element1vec=[svec;svec;-svec;-svec]; | ||
|
||
F=sparse(Sparseavec,Sparsebvec,Element1vec,2*p+q,2*p+q); | ||
|
||
clear corrmat ivec jvec svec Sparseavec Sparsebvec Element1vec; | ||
|
||
c=[-X'*y;X'*y;zeros(q,1)]; | ||
lowbound=[zeros(2*p,1);-inf(q,1)]; | ||
|
||
Sparse1vec=[ones(2*p+q,1);2;2;2;3;3;3]; | ||
Sparse2vec=[(1:(2*p+q))';1;p+1;2*p+1;2;p+2;2*p+1]; | ||
Elementvec=[ones(2*p,1);ones(q,1);1;1;-1;1;1;-1]; | ||
rowcount=2; | ||
paircount=2*p+1; | ||
|
||
initcoef1=[max(initcoef,0);-min(initcoef,0)]; | ||
|
||
for i=1:p | ||
for j=(i+1):p | ||
initcoef1=[initcoef1;max(abs(initcoef(i)),abs(initcoef(j)))]; | ||
if rowcount>2 | ||
Sparse1vec=[Sparse1vec;rowcount;rowcount;rowcount;rowcount+1;rowcount+1;rowcount+1]; | ||
Sparse2vec=[Sparse2vec;i;i+p;paircount;j;j+p;paircount]; | ||
Elementvec=[Elementvec;1;1;-1;1;1;-1]; | ||
end; | ||
rowcount=rowcount+2; | ||
paircount=paircount+1; | ||
end; | ||
end; | ||
|
||
|
||
% Grid search over c values, then by proportion of bound. For each c, the | ||
% constraint matrix is updated. | ||
|
||
CoefMatrix = zeros(p,length(propvalues),length(cvalues)); | ||
SSMatrix = zeros(1,length(propvalues),length(cvalues)); | ||
dfMatrix = zeros(1,length(propvalues),length(cvalues)); | ||
|
||
for ccount = 1:length(cvalues) | ||
cvalue = cvalues(ccount); | ||
tempvec=[(1-cvalue)*ones(2*p,1);cvalue*ones(q,1)]; | ||
Elementvec(1:(2*p+q))=tempvec; | ||
clear tempvec; | ||
|
||
A=sparse(Sparse1vec,Sparse2vec,Elementvec,2*q+1,2*p+q); | ||
initialnorm=A(1,:)*initcoef1; | ||
for propcount = 1:length(propvalues) | ||
tbound = propvalues(propcount)*initialnorm; | ||
b=[tbound; zeros(2*q,1)]; | ||
|
||
if (ccount == 1) | ||
if (propcount == 1) | ||
start=zeros(2*p+q,1); | ||
end; | ||
elseif (propcount > 1) | ||
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)]; | ||
for i=1:p | ||
for j=(i+1):p | ||
start=[start;max(abs(start(i)),abs(start(j)))]; | ||
end; | ||
end; | ||
else | ||
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)]; | ||
for i=1:p | ||
for j=(i+1):p | ||
start=[start;max(abs(start(i)),abs(start(j)))]; | ||
end; | ||
end; | ||
end; | ||
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%%%%%% | ||
%%%%%% | ||
%%%%%% The TOMLAB package is now used as a quadratic programming solver. | ||
|
||
%%%%%% IF AN ALTERNATIVE QUADRATIC PROGRAMMING SOLVER IS AVAILABLE, | ||
%%%%%% REPLACE THIS SECTION WITH A CALL TO THAT SOLVER BASED ON | ||
%%%%%% THE FORMULATION | ||
|
||
% minimize (over x) : 0.5 x' F x + c' x | ||
% subject to A x <= b and x >= lowbound | ||
% | ||
|
||
Prob = qpAssign(F, c, A, [], b, lowbound, [], start, [], [], [], [], [], [], [], []); | ||
Result = tomRun('sqopt', Prob, 0); | ||
|
||
x=Result.x_k; | ||
exitflag = Result.ExitFlag; | ||
conv = (exitflag == 0); | ||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|
||
|
||
CoefMatrix(:,propcount,ccount) = round((x(1:p)-x(p+1:2*p))*10^7)*10^(-7); | ||
SSMatrix(:,propcount,ccount) = sumsqr(y-X*CoefMatrix(:,propcount,ccount)); | ||
|
||
EffParVec=unique(abs(CoefMatrix(:,propcount,ccount))); | ||
EffParVec=EffParVec(EffParVec>0); | ||
dfMatrix(:,propcount,ccount) = length(EffParVec); | ||
|
||
if conv == 0 | ||
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount)); | ||
else | ||
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount)); | ||
end; | ||
|
||
end; | ||
|
||
end; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
|
||
function [CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method) | ||
|
||
p = length(X(1,:)); | ||
|
||
% Standardize predictors to mean zero, variance 1, and center response to | ||
% mean zero. | ||
|
||
for i = 1:p | ||
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i)); | ||
end; | ||
y = y-mean(y); | ||
|
||
if nargin < 6 | ||
method = 2; | ||
if nargin < 5 | ||
initcoef = []; | ||
end; | ||
end; | ||
|
||
cvalues=sort(cvalues); | ||
propvalues=sort(propvalues); | ||
|
||
if isempty(initcoef) | ||
[initcoef] = regress(y,X); | ||
end; | ||
if length(initcoef)<p | ||
error('initial estimate must be of length p'); | ||
end; | ||
if min(cvalues)<0 | ||
error('all values of c must be nonnegative'); | ||
end; | ||
if max(cvalues)>1 | ||
error('values for c cannot exceed 1'); | ||
end; | ||
if min(propvalues)<=0 | ||
error('values for proportion must be greater than 0'); | ||
end; | ||
if max(propvalues)>=1 | ||
error('values for proportion must be smaller than 1'); | ||
end; | ||
|
||
if method == 1 | ||
[CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef); | ||
end; | ||
if method ~= 1 | ||
[CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef); | ||
end; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
|
||
function [coefs df ssquares OrderMatrix conv] = OscarSeqOpt (tbound, cvalue, Xmatrix, y, start, OrderMatrix) | ||
|
||
p = length(Xmatrix(1,:))/2; | ||
|
||
sdresponse = std(y); | ||
y = sqrt(p)*y/sdresponse; | ||
% The rescaling of the response is for computational purposes only (makes the overall variance of y to be p), the | ||
% coefficients will be rescaled back. | ||
|
||
start = sqrt(p)*start/sdresponse; | ||
lowbound = zeros(2*p,1); | ||
|
||
flagvalue = 0; | ||
itervalue = 0; | ||
SolCoef = start(1:p)-start((p+1):(2*p)); | ||
|
||
% The sequential constrained least squares problem is now solved by adding | ||
% an additional constraint at each step and iterating until convergence. | ||
|
||
while (flagvalue == 0) && (itervalue < p^2) | ||
nconstraints = length(OrderMatrix(:,1)); | ||
A1 = (1-cvalue)*ones(nconstraints,p)+cvalue*(p*ones(nconstraints,p)-OrderMatrix); | ||
Amatrix = [A1 A1]; | ||
Bbound = (sqrt(p)*tbound/sdresponse)*ones(1,length(Amatrix(:,1))); | ||
|
||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|
||
% A quadratic programming solver is now used to solve the constrained least | ||
% squares problem at each iteration. | ||
% | ||
% The problem is given as a constrained least squares in the form | ||
% Minimize with respect to u: | ||
% | ||
% || y - Xmatrix u ||^2 | ||
% | ||
% subject to: | ||
% Amatrix u <= Bbound and u >= lowbound | ||
% | ||
% The following call to the least squares solver can be changed to use any solver if a more efficient one is | ||
% available. This is the only thing that needs to be changed. | ||
|
||
options = optimset('Display', 'off', 'LargeScale', 'off'); | ||
[x ssquare resid exitflag] = lsqlin(Xmatrix, y, Amatrix, Bbound, [], [], lowbound, [], start, options); | ||
|
||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
|
||
|
||
SolCoef1 = round((x(1:p)-x((p+1):(2*p)))*10^7)*10^(-7); | ||
[currcoef, neworder] = sort(-abs(SolCoef1)); | ||
sameaslast = [0; (currcoef(2:p) == currcoef(1:(p-1)))]; | ||
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0]; | ||
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)]; | ||
nblocksame = sum(startblocksame); | ||
vi = (1:p)'; | ||
visbs = vi(logical(startblocksame)); | ||
viebs = vi(logical(endblocksame)); | ||
for j = 1:nblocksame | ||
blockmean = mean(vi(visbs(j):viebs(j))); | ||
vi(visbs(j):viebs(j)) = blockmean * ones(viebs(j) - visbs(j) + 1,1); | ||
end; | ||
[tempinvsort,vind] = sort(neworder); | ||
a1weights = vi(vind)'; | ||
currcoef = -currcoef; | ||
OrderMatrix = unique([OrderMatrix; a1weights],'rows'); | ||
flagvalue = 1; | ||
test = round(SolCoef*10^7)*10^(-7)-round(SolCoef1*10^7)*10^(-7); | ||
SolCoef = SolCoef1; | ||
flag2 = sqrt(sumsqr(test)); | ||
if (flag2>10^(-7)) | ||
flagvalue = 0; | ||
end; | ||
itervalue = itervalue+1; | ||
start = x; | ||
end; | ||
|
||
coefs = sdresponse*SolCoef/sqrt(p); | ||
df = length(unique(currcoef(currcoef>0))); | ||
ssquares = (sdresponse^2)*ssquare/p; | ||
conv = ((itervalue < p^2) && (exitflag > 0)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
|
||
|
||
% X should be a matrix whose rows are the observations and columns are the | ||
% predictors (n by p). The intercept is omitted (so the response and each | ||
% predictor should have mean zero). | ||
|
||
function [CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef) | ||
|
||
p = length(X(1,:)); | ||
|
||
% Standardize predictors to mean zero, variance 1, and center response to | ||
% mean zero. | ||
|
||
for i = 1:p | ||
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i)); | ||
end; | ||
Xmatrix = [X -X]; % Need for positive and negative parts of beta | ||
y = y-mean(y); | ||
|
||
% Order initial estimate by magnitude, including ties. | ||
% Initial estimate is used for first guess at ordering of coefficients and also to set maximum value | ||
% for the bound t used in the constraint. The bound is given as proportion | ||
% of the initial value. | ||
|
||
[initcoeford, currorder] = sort(-abs(initcoef)); | ||
sameaslast = [0; (initcoeford(2:p) == initcoeford(1:(p-1)))]; | ||
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0]; | ||
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)]; | ||
nblocksame = sum(startblocksame); | ||
vi = (1:p)'; | ||
visbs = vi(logical(startblocksame)); | ||
viebs = vi(logical(endblocksame)); | ||
for i = 1:nblocksame; | ||
blockmean = mean(vi(visbs(i):viebs(i))); | ||
vi(visbs(i):viebs(i)) = blockmean * ones(viebs(i) - visbs(i) + 1,1); | ||
end; | ||
[tempinvsort,vind] = sort(currorder); | ||
a1 = vi(vind)'; | ||
initcoeford = -initcoeford; | ||
|
||
% Grid search over c values, then by proportion of bound. For each c, the | ||
% weighting is recomputed. | ||
|
||
CoefMatrix = zeros(p,length(propvalues),length(cvalues)); | ||
SSMatrix = zeros(1,length(propvalues),length(cvalues)); | ||
dfMatrix = zeros(1,length(propvalues),length(cvalues)); | ||
|
||
for ccount = 1:length(cvalues) | ||
OrderMatrix = a1; | ||
weighting = a1; | ||
cvalue = cvalues(ccount); | ||
for i=1:p | ||
weighting(i) = (1-cvalue)+cvalue*(p-i); | ||
end; | ||
for propcount = 1:length(propvalues) | ||
tbound = propvalues(propcount)*weighting*initcoeford; | ||
if (ccount == 1) | ||
if (propcount == 1) | ||
start=zeros(2*p,1); | ||
end; | ||
elseif (propcount > 1) | ||
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)]; | ||
else | ||
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)]; | ||
end; | ||
[coefs df ssquares conv] = OscarSeqOpt(tbound, cvalue, Xmatrix, y, start, OrderMatrix); | ||
CoefMatrix(:,propcount,ccount) = coefs; | ||
SSMatrix(:,propcount,ccount) = ssquares; | ||
dfMatrix(:,propcount,ccount) = df; | ||
if conv == 0 | ||
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount)); | ||
else | ||
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount)); | ||
end; | ||
end; | ||
end; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
BaseSat,SumCation,CECbuffer,Ca,Mg,K,Na,P,Cu,Zn,Mn,HumicMatter,Density,pH,ExchAc,Diversity | ||
2.34,0.1576,0.614,0.0892,0.0328,0.0256,0.01,0,0.08,0.184,3.2,0.122,0.0822,0.516,0.466,0.2765957 | ||
1.64,0.097,0.516,0.0454,0.0218,0.0198,0.01,0,0.064,0.112,2.734,0.0952,0.085,0.512,0.43,0.2613982 | ||
5.2,0.452,0.828,0.3306,0.0758,0.0336,0.012,0.24,0.136,0.35,4.148,0.1822,0.0746,0.554,0.388,0.2553191 | ||
4.1,0.3054,0.698,0.2118,0.0536,0.026,0.014,0.03,0.126,0.364,3.728,0.1646,0.0756,0.546,0.408,0.2401216 | ||
2.7,0.2476,0.858,0.1568,0.0444,0.0304,0.016,0.384,0.078,0.376,4.756,0.2472,0.0692,0.45,0.624,0.1884498 | ||
2.46,0.2256,0.828,0.1266,0.0478,0.0352,0.016,0.268,0.08,0.244,4.446,0.1984,0.0652,0.442,0.618,0.2370821 | ||
6.02,0.688,1.068,0.5366,0.0974,0.04,0.014,0,0.168,0.17,3.364,0.245,0.0804,0.532,0.394,0.1641337 | ||
5.54,0.6054,1.03,0.4684,0.0856,0.0354,0.016,0.038,0.198,0.146,3.46,0.2828,0.0778,0.52,0.442,0.2036474 | ||
1.96,0.136,0.612,0.075,0.0278,0.0212,0.012,0.016,0.086,0.196,5.288,0.2706,0.0732,0.506,0.49,0.2553191 | ||
2,0.1308,0.612,0.0686,0.0302,0.022,0.01,0.054,0.082,0.216,5.952,0.2662,0.075,0.504,0.49,0.2644377 | ||
1.3,0.093,0.64,0.047,0.0172,0.0168,0.012,0.004,0.07,0.134,2.938,0.3824,0.0664,0.474,0.558,0.1702128 | ||
1.22,0.0802,0.6,0.0382,0.0156,0.0164,0.01,0.026,0.07,0.124,2.178,0.3638,0.065,0.474,0.528,0.1854103 | ||
1.6,0.1214,0.688,0.0526,0.0306,0.0282,0.01,0.006,0.076,0.204,3.002,0.2466,0.0694,0.494,0.578,0.224924 | ||
1.28,0.0848,0.558,0.0304,0.0194,0.025,0.01,0,0.084,0.134,2.144,0.2264,0.0726,0.492,0.486,0.2340426 | ||
2.88,0.2526,0.812,0.1678,0.0464,0.0204,0.018,0.256,0.164,0.318,3.426,0.3036,0.0704,0.466,0.576,0.2492401 | ||
2.72,0.221,0.764,0.1452,0.042,0.0198,0.014,0.15,0.184,0.44,4.58,0.2394,0.0728,0.466,0.556,0.2492401 | ||
2.08,0.1344,0.586,0.0756,0.026,0.0228,0.01,0,0.15,0.144,3.752,0.3234,0.073,0.522,0.462,0.2340426 | ||
2.76,0.1788,0.59,0.1084,0.0382,0.0222,0.01,0,0.174,0.138,4.092,0.3048,0.0718,0.528,0.424,0.2462006 | ||
5.48,0.7074,1.198,0.5622,0.0988,0.0264,0.02,0.598,0.146,0.298,4.08,0.307,0.0722,0.516,0.512,0.2036474 | ||
5.6,0.659,1.142,0.5322,0.0856,0.0272,0.014,0.184,0.14,0.298,4.492,0.277,0.0796,0.516,0.498,0.2006079 |