% J.C. Spall, Sept. 1998 % twoSGconstrained % Code for evaluation of second-order SGSA (2SGSA) versus first-order % SGSA (1SGSA, as in Chap. 5 of ISSO). Code is for comparative evaluation purposes; hence, % it includes much that is not required for a basic implementation. Further, it is % in no way "optimized" for efficiency or generality; this is strictly research % code for the purpose of getting a basic idea of how 2SGSA works. % % Code includes the capability for initializing 2SGSA by running 1SGSA for % N iterations. This code allows for averaging of the % SG gradients and Hessian estimates at EACH iteration after the % initial (N) iterations where only 1SGSA is used for estimating theta. We use "thetaH" % for the 2SGSA recursion and "theta" for the 1SGSA recursion. % % UPDATE MAR. 2006: Feedback and weighting versions of this code are available from the author; % this can generally provide enhanced performance. (Reference: Spall, J. C. (2006), “Feedback and Weighting Mechanisms % for Improving Jacobian (Hessian) Estimates in the Adaptive Simultaneous Perturbation Algorithm,” Proceedings of the % American Control Conference, 14-16 June 2006, Minneapolis, MN, paper ThB09.1 in CD-ROM.) % clear all close all global p z sigma p=10; % meas. noise standard deviation; multplies all elements of N(0,I)noise % vector sigma=0; % value of numerator in a_k sequence for all iterations of 1SGSA % and first N iterations in the initialization of 2SGSA aN=20*1.4; %value of numerator in a_k in 2SGSA (beginning at N+1) %the number 23.622 below is relevant to N=500 and A=50 %(i.e., it is such that 23.622/(N+1+A)^.501 = 1) a=.1*23.622; A=50; c=.001; alpha=1; gamma=.5; n=1000; N=500; % number of cases (replications) of 2SGSA and 1SGSA cases=1; % number of individual gradient/Hessian estimates to be averaged at each iter. gH_avg=1; avg=1; rand('seed',31415297) randn('seed',311113) thetamin=-5*ones(p,1); %Lower bounds on theta (in unconstrained, set to large neg. no.) thetamax=5*ones(p,1); %Upper bounds on theta (in unconstrained, set to large po % %the loop 1:cases below is for doing multiple cases for use in %averaging to %evaluate the relative performance. % %the loop 1:N below uses the standard 1SG form to initialize 2SGA % %lines below initialize various recuresions for the gradient/Hess. %averaging %and for final error reporting based on the average of the solutions %for %"cases" replications. meanHbar=0; errtheta=0; errthetaIA=0; errthetaH=0; losstheta=0; lossthetaIA=0; lossthetaH=0; lossthetasq=0; lossthetaIAsq=0; lossthetaHsq=0; tolerancetheta=10; toleranceloss=0; truetheta=zeros(p,1); theta_0=ones(p,1); loss4thorder(theta_0) %this is a particular loss function used in some studies (with loss4thgrad) %DUMMY STATEMENT FOR SETTING DIMENSIONS OF Hhat (AVOIDS OCCASIONAL %ERROR MESSAGES) Hhat=eye(p); for j=1:cases caseiter=j %INITIALIZATION OF PARAMETER AND HESSIAN ESTIMATES theta=theta_0; thetaH=theta; Hbar=.1*eye(p); %*********2SG********* %INITIAL N ITERATIONS OF 1SG PRIOR TO 2SG ITERATIONS for k=1:N ak=aN/(k+A)^alpha; Gk=loss4thgrad(thetaH); thetaHlag=thetaH; thetaH=thetaH-ak*Gk; % Checking for constraints below thetaH=min(thetaH,thetamax); thetaH=max(thetaH,thetamin); if max(abs(thetaHlag-thetaH)) > tolerancetheta; thetaH=thetaHlag; end end lossold=0; for i=1:avg lossold=lossold+loss4thnoise(thetaH); end % % 2SG ITERATIONS FOLLOWING INITIALIZATION % for k=N+1:n ak=a/(k+A-N)^alpha; ck=c/k^gamma; ghatinput=0; Hhatinput=0; % GENERATION OF AVERAGED GRADIENT AND HESSIAN (NO AVERAGING IF % gH_avg=1) for m=1:gH_avg delta=2*round(rand(p,1))-1; thetaplus=thetaH+ck*delta; thetaminus=thetaH-ck*delta; ghatplus=loss4thgrad(thetaplus); ghatminus=loss4thgrad(thetaminus); % STATEMENT PROVIDING AN AVERAGE OF SP GRAD. APPROXS. PER ITERATION ghatinput=((m-1)/m)*ghatinput+loss4thgrad(thetaH)/m; deltaghat=ghatplus-ghatminus; for i=1:p Hhat(:,i)=deltaghat(i)./(2*ck*delta); end Hhat=.5*(Hhat+Hhat'); Hhatinput=((m-1)/m)*Hhatinput+Hhat/m; end Hbar=((k-N)/(k-N+1))*Hbar+Hhatinput/(k-N+1); % THE THETA UPDATE (FORM BELOW USES GAUSSIAN ELIMINATION TO AVOID % DIRECT COMPUTATION OF HESSIAN INVERSE) Hbarbar=sqrtm(Hbar*Hbar+.000001*eye(p)/k); thetaHlag=thetaH; thetaH=thetaH-ak*(Hbarbar\ghatinput); % Checking for constraints below thetaH=min(thetaH,thetamax); thetaH=max(thetaH,thetamin); % Steps below perform "blocking" step with "avg" no. of loss evaluations lossnew=0; for i=1:avg lossnew=lossnew+loss4thnoise(thetaH); end if lossnew/avg > lossold/avg-toleranceloss; thetaH=thetaHlag; else lossold1=lossold; lossold=lossnew; end if max(abs(thetaHlag-thetaH)) > tolerancetheta; lossold=lossold1; thetaH=thetaHlag; end end thetaH Hbar loss4thorder(thetaH) % %********1SGSA************* % The iterations below are the basic SG iterations. Uses the same gain sequences % as the 1:N block above (where 2SGSA is not fully engaged). Uses same number % of loss gradient measurements. Also count L evaluation in 2SGSA to % have same cost as grad. evaluation (hence multiplier "4" vs."3" in "for" loops below). % The overall loop is broken into two parts to accomodate a sliding window of % the last IA iterates for an iterate averaging solution. % IA=200; % amt. of iterate averaging for k=1:N+4*(n-N)*gH_avg - IA ak=aN/(k+A)^.602; Gk=loss4thgrad(theta); thetalag=theta; theta=theta-ak*Gk; % Checking for constraints below theta=min(theta,thetamax); theta=max(theta,thetamin); if max(abs(thetalag-theta)) > tolerancetheta; theta=thetalag; end end thetabar=0; for k=N+4*(n-N)*gH_avg - IA+1:N+4*(n-N)*gH_avg ak=aN/(k+A)^.602; Gk=loss4thgrad(theta); thetalag=theta; theta=theta-ak*Gk; % Checking for constraints below theta=min(theta,thetamax); theta=max(theta,thetamin); if max(abs(thetalag-theta)) > tolerancetheta; theta=thetalag; end thetabar=thetabar+theta; end theta thetabar=thetabar/IA; meanHbar=meanHbar+Hbar; errtheta=errtheta+(theta-truetheta)'*(theta-truetheta); errthetaIA=errthetaIA+(thetabar-truetheta)'*(thetabar-truetheta); errthetaH=errthetaH+(thetaH-truetheta)'*(thetaH-truetheta); lossthetasq=lossthetasq+loss4thorder(theta)^2; lossthetaIAsq=lossthetaIAsq+loss4thorder(thetabar)^2; lossthetaHsq=lossthetaHsq+loss4thorder(thetaH)^2; losstheta=losstheta+loss4thorder(theta); lossthetaIA=lossthetaIA+loss4thorder(thetabar); lossthetaH=lossthetaH+loss4thorder(thetaH); end meanHbar/cases % normalized results of 1SGSA and 2SGSA norm_theta=((errtheta/cases)^.5)/((theta_0-truetheta)'*(theta_0-truetheta))^.5 norm_thetaIA=((errthetaIA/cases)^.5)/((theta_0-truetheta)'*(theta_0-truetheta))^.5 norm_thetaH=((errthetaH/cases)^.5)/((theta_0-truetheta)'*(theta_0-truetheta))^.5 % standard dev. of mean of normalized loss values; these are by multiplied by % (cases/(cases-1))^.5 to account for loss of degree of freedom in standard % deviation calculation before using with t-test if cases > 1; (cases^(-.5))*((cases/(cases-1))^.5)*(lossthetasq/(cases*loss4thorder(theta_0)^2)-(losstheta/(cases*loss4thorder(theta_0)))^2)^.5 (cases^(-.5))*((cases/(cases-1))^.5)*(lossthetaIAsq/(cases*loss4thorder(theta_0)^2)-(lossthetaIA/(cases*loss4thorder(theta_0)))^2)^.5 (cases^(-.5))*((cases/(cases-1))^.5)*(lossthetaHsq/(cases*loss4thorder(theta_0)^2)-(lossthetaH/(cases*loss4thorder(theta_0)))^2)^.5 end norm_losstheta=losstheta/(cases*loss4thorder(theta_0)) norm_lossthetaIA=lossthetaIA/(cases*loss4thorder(theta_0)) norm_lossthetaH=lossthetaH/(cases*loss4thorder(theta_0))