Code for “Don’t Overfit!” competition

Here you can find the code for my entry in the “Don’t Overfit!” prediction competition. For this competition I used Bayesian inference, implemented using Gibbs sampling. For details see my earlier post.

Contents

Gibbs sampler

function [pred,sel]=doGibbs(ytrain,xtrain,xtest) %#codegen

% number of draws

nrburnin=1e4;

nrsamp=1e5;

% dimension

ntest=size(xtest,1);

ntrain=size(xtrain,1);

k=size(xtest,2);

% pre-allocate

pred=zeros(ntest,1);

sel=zeros(k,1);

% means of x

xmean=mean([xtrain; xtest]);

% rescale xtrain

xisdown=(xtrain==1);

xisup=(xtrain==0);

xtrain=repmat(xmean,ntrain,1)-xtrain;

trainrscale=(1e-3)*ones(ntrain,k)-(5e-4)*(xisup | xisdown);

trainaddmat=(-5e-4)*ones(ntrain,k)+(25e-5)*(xisup | xisdown);

xtrain(xisdown)=xtrain(xisdown)+25e-5;

xtrain(xisup)=xtrain(xisup)-25e-5;

xRef=xtrain;

xtrainlb=xRef-5e-4;

xtrainlb(xisdown)=-0.5;

xtrainlb(xisup)=xRef(xisup)-25e-5;

xtrainub=xRef+5e-4;

xtrainub(xisup)=0.5;

xtrainub(xisdown)=xRef(xisdown)+25e-5;

% rescale xtest

xtisdown=(xtest==1);

xtisup=(xtest==0);

xtest=repmat(xmean,ntest,1)-xtest;

xtest(xtisdown)=xtest(xtisdown)+25e-5;

xtest(xtisup)=xtest(xtisup)-25e-5;

% startvalues

err=10;

beta=rand(k,1);

isIncl=true(k,1);

ylat=xtrain*beta;

% do Gibbs sampling

for i=1:nrburnin+nrsamp

    % generate random numbers

    betarands=rand(k,2);

    xrands=rand(ntrain,k);

    % set new error margin while burning in

    if err>0

        err=max(max(ylat.*~ytrain-ylat.*ytrain),0);

    end

    % loop over regressors

    for j=1:k

        % ////// subtract out the influence of this regressor /////

        ylat=ylat-xtrain(:,j)*beta(j);

        % //////// find upper and lower bound on beta_j ////////

        betaUp=1;

        betaLow=0;

        rat=-ylat./xtrain(:,j);

        for q=1:ntrain

            % positive x

            if xtrain(q,j)>0

                margin=err/xtrain(q,j);

                % y = true

                if ytrain(q)

                    if rat(q)-margin>betaLow

                        betaLow=rat(q)-margin;

                    end

                % y = false

                else

                    if rat(q)+margin<betaUp

                        betaUp=rat(q)+margin;

                    end

                end

            % negative x

            elseif xtrain(q,j)<0

                margin=-err/xtrain(q,j);

                % y = true

                if ytrain(q)

                    if rat(q)+margin<betaUp

                        betaUp=rat(q)+margin;

                    end

                % y = false

                else

                    if rat(q)-margin>betaLow

                        betaLow=rat(q)-margin;

                    end

                end

            end

        end

        % ///////////// sample new beta_j ////////////////////

        % marginal probability

        prob=betaUp-betaLow;

        % can we eliminate this regressor?

        if betaLow==0 && betarands(j,1)<=1/(1+prob);

            beta(j)=0;

            isIncl(j)=false;

        % sample a new one

        else

            beta(j)=betaLow+betarands(j,2)*(betaUp-betaLow);

            isIncl(j)=true;

        end

        % ///////// update latent /////////////

        ylat=ylat+xtrain(:,j)*beta(j);

    end

    % /////////// sample new x's ////////////////

    for j=1:k

        % parameter not included

        if ~isIncl(j)

            xtrain(:,j)=xRef(:,j)+trainrscale(:,j).*xrands(:,j)+trainaddmat(:,j);

        % parameter included

        else

            margin=err/beta(j);

            % loop over data

            for q=1:ntrain

                % subtract from latent

                ylat(q)=ylat(q)-xtrain(q,j)*beta(j);

                % initial bounds

                xUp=xtrainub(q,j);

                xLow=xtrainlb(q,j);

                % ratio

                xrat=-ylat(q)/beta(j);

                % y = true

                if ytrain(q)

                    if xrat-margin>xLow

                        xLow=xrat-margin;

                    end

                % y = false

                else

                    if xrat+margin<xUp

                        xUp=xrat+margin;

                    end

                end

                % sample new x

                xtrain(q,j)=xLow+xrands(q,j)*(xUp-xLow);

                % update latent

                ylat(q)=ylat(q)+xtrain(q,j)*beta(j);

            end

        end

    end

    % ////////////// save results ////////////////

    if i>nrburnin

        % sum of rounding errors is approximately normal

        stdv=sqrt((1e-6)*sum(beta.^2)/12);

        pred=pred+0.5+0.5*erf((xtest*beta)/(sqrt(2)*stdv));

        sel=sel+double(isIncl); % threshold these for submission

    end

end

% normalize

pred=pred/nrsamp;

sel=sel/nrsamp;

end

Published with MATLAB® 7.12

 

Leave a Reply

Your email address will not be published. Required fields are marked *

*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code lang=""> <del datetime=""> <em> <i> <q cite=""> <strike> <strong> <pre lang="" extra="">