Code for Deloitte/FIDE Chess Rating Challenge

Contents

The base model: code

%///////////// chessModel() ///////////////////

function [predictedWhiteScore,model]=chessModel(whiteId,blackId,realizedWhiteScore,whiteUnknown,weights,holdoutWhiteId,holdoutBlackId,model,nrIt)

% Purpose: set up the model and generate predictions

% size of data

nMatches=length(whiteId);

nPlayers=double(max(max(whiteId),max(blackId)));

% input

if nargin<9

    model=struct();

    % parameters

    model.whiteAdv=0.2;

    model.shrinkPrec=1;

    model.errorVar=4;

    % approximate Bayes sufficient statistics

    model.skillPrec=model.shrinkPrec*ones(nPlayers,1);

    model.skillTau=zeros(nPlayers,1);

    model.lhPrec=zeros(nMatches,2);

    model.lhTau=zeros(nMatches,2);

    model.muSumWeights=ones(nPlayers,1);

    for i=1:nMatches

        model.muSumWeights(whiteId(i))=model.muSumWeights(whiteId(i))+weights(i,1);

        model.muSumWeights(blackId(i))=model.muSumWeights(blackId(i))+weights(i,2);

    end

    model.muSumSkills=zeros(nPlayers,1);

    model.muOppSkill=zeros(nMatches,2);

end

if nargin<10

    % number of iterations

    nrIt=500; % number used to make predictions in contest

end

% do updates

model = EPupdate(model,whiteId,blackId,realizedWhiteScore,whiteUnknown,weights,holdoutWhiteId,holdoutBlackId,nrIt);

% make predictions

postVar1=1./model.skillPrec(holdoutWhiteId);

postMean1=postVar1.*model.skillTau(holdoutWhiteId);

postVar2=1./model.skillPrec(holdoutBlackId);

postMean2=postVar2.*model.skillTau(holdoutBlackId);

ratioLowerBoundary=(postMean1+model.whiteAdv-postMean2-1)./sqrt(postVar1+postVar2+model.errorVar);

ratioUpperBoundary=(postMean1+model.whiteAdv-postMean2+1)./sqrt(postVar1+postVar2+model.errorVar);

predictedWhiteScore=0.5*(1+0.5*erf(ratioLowerBoundary/sqrt(2))+0.5*erf(ratioUpperBoundary/sqrt(2)));

end

%///////////// EPupdate() ///////////////////

function model = EPupdate(model,whiteId,blackId,realizedWhiteScore,whiteUnknown,weights,nrIt) %#eml

% Purpose: update approximation of posterior and parameter estimation

% compile to increase speed

eml.extrinsic('error');

% do nrIt passes over the data

for j=1:nrIt

    % pre-allocate EM statistics

    sumErrors=0;

    sumSqErrors=0;

    sumWeights=0;

    % likelihood of matches

    for t=1:length(whiteId)

        % id's

        wi=whiteId(t);

        bi=blackId(t);

        % SHRINKAGE

        % shrink player 1 to player 2

        model.skillTau(wi)= model.skillTau(wi)-model.shrinkPrec*model.muSumSkills(wi)/model.muSumWeights(wi);

        weightedOppSkill = weights(t,1)*model.skillTau(bi)/model.skillPrec(bi);

        model.muSumSkills(wi) = model.muSumSkills(wi)-model.muOppSkill(t,1)+weightedOppSkill;

        model.muOppSkill(t,1) = weightedOppSkill;

        model.skillTau(wi) = model.skillTau(wi)+model.shrinkPrec*model.muSumSkills(wi)/model.muSumWeights(wi);

        % shrink player 2 to player 1

        model.skillTau(bi) = model.skillTau(bi)-model.shrinkPrec*model.muSumSkills(bi)/model.muSumWeights(bi);

        weightedOppSkill = weights(t,2)*model.skillTau(wi)/model.skillPrec(wi);

        model.muSumSkills(bi) = model.muSumSkills(bi)-model.muOppSkill(t,2)+weightedOppSkill;

        model.muOppSkill(t,2) = weightedOppSkill;

        model.skillTau(bi) = model.skillTau(bi)+model.shrinkPrec*model.muSumSkills(bi)/model.muSumWeights(bi);

        % PROCESS MATCH RESULT

        % prior on skills

        priorPrec1=model.skillPrec(wi)-model.lhPrec(t,1);

        priorTau1=model.skillTau(wi)-model.lhTau(t,1);

        priorPrec2=model.skillPrec(bi)-model.lhPrec(t,2);

        priorTau2=model.skillTau(bi)-model.lhTau(t,2);

        priorVar=1/priorPrec1 + 1/priorPrec2 + model.errorVar;

        priorMean=priorTau1/priorPrec1 - priorTau2/priorPrec2;

        % posterior of performance difference if the first player is

        % indeed playing white

        [postMean,postVar,prob1] = calcPost(priorMean+model.whiteAdv,priorVar,realizedWhiteScore(t));

        postMean = postMean-model.whiteAdv;

        postTau = postMean/postVar;

        postPrec = 1/postVar;

        % account for the possibility that the second player is white

        if whiteUnknown(t)

            [postMean2,postVar2,prob2] = calcPost(priorMean-model.whiteAdv,priorVar,realizedWhiteScore(t));

            postMean2 = postMean2+model.whiteAdv;

            sumprob = prob1+prob2;

            postTau = (prob1*postTau+prob2*postMean2/postVar2)/sumprob;

            postPrec = (prob1/postVar+prob2/postVar2)/sumprob;

        end

        % likelihood terms

        if postPrec <= 1/priorVar

            error('improper message!')

        else

            % likelihood on performance difference

            lhp=postPrec-1/priorVar;

            lht=postTau-priorMean/priorvar;

            % likelihood on skills

            model.lhPrec(t,1)=weights(t,1)/(1/lhp + 1/priorPrec2 + model.errorVar);

            model.lhTau(t,1)=model.lhPrec(t,1)*(priorTau2/priorPrec2+lht/lhp);

            model.lhPrec(t,2)=weights(t,2)/(1/lhp + 1/priorPrec1 + model.errorVar);

            model.lhTau(t,2)=model.lhPrec(t,2)*(priorTau1/priorPrec1-lht/lhp);

            % update posterior

            model.skillPrec(wi)=priorPrec1+model.lhPrec(t,1);

            model.skillTau(wi)=priorTau1+model.lhTau(t,1);

            model.skillPrec(bi)=priorPrec2+model.lhPrec(t,2);

            model.skillTau(bi)=priorTau2+model.lhTau(t,2);

            % EM statistics

            if ~whiteUnknown(t)

                wt=weights(t,1)+weights(t,2);

                sumv=1/lhp + 1/priorPrec1 + 1/priorPrec2;

                elhp=1/sumv+1/model.errorVar;

                elht=(lht/lhp - priorTau1/priorPrec1 + priorTau2/priorPrec2)/sumv;

                sumErrors=sumErrors+wt*elht/elhp;

                sumSqErrors=sumSqErrors+wt*((elht/elhp)^2+1/elhp);

                sumWeights=sumWeights+wt;

            end

        end

    end

    % do EM update model parameters

    model.whiteAdv = model.whiteAdv + sumErrors/sumWeights;

    model.errorVar = sumSqErrors/sumWeights;

end

end

Post-processing: code

don’t pay too much attention to the ugly code here…

function newWhiteScore = postProcess(matchMonth,predictedWhiteScore,whiteId,blackId,whiteSkill,blackSkill,...

    muWhite,muBlack,whiteSkillVar,blackSkillVar,matchesPerMonth,weightedOppSkillWhite,weightedOppSkillBlack,...

    weightedMatchCountWhite,weightedMatchCountBlack,isTrainData,realizedWhiteScore,treeCount)

% settings

nrClusters=5000; % <1000 for cross-validation

shrinkFactor=0.25;

nrMonths=max(matchMonth)-min(matchMonth)+1;

n=length(predictedWhiteScore);

nrPlayers=max(max(whiteId),max(blackId));

% calculate predicted number of wins per player per month

opsum=zeros(nrMonths,nrPlayers);

c=1;

for i=1:n

    opsum(c,whiteId(i))=opsum(c,whiteId(i))+predictedWhiteScore(i);

    opsum(c,blackId(i))=opsum(c,blackId(i))+1-predictedWhiteScore(i);

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% distribute

lpw=zeros(n,1);

lpb=zeros(n,1);

c=1;

for i=1:n

    lpw(i)=opsum(c,whiteId(i));

    lpb(i)=opsum(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% nr of matches played in the past 6/12 months

wmc6=zeros(n,1);

bmc6=zeros(n,1);

wmc12=zeros(n,1);

bmc12=zeros(n,1);

d=min(matchMonth)-1;

for i=1:n

    wmc6(i)=sum(matchesPerMonth(d-5:d,whiteId(i)));

    bmc6(i)=sum(matchesPerMonth(d-5:d,blackId(i)));

    wmc12(i)=sum(matchesPerMonth(d-11:d,whiteId(i)));

    bmc12(i)=sum(matchesPerMonth(d-11:d,blackId(i)));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        if mod(matchMonth(i+1)-1,3)==0

            d=d+3;

        end

    end

end

% average opponent strength & match counts this month

opsum=zeros(nrMonths,nrPlayers);

opc=zeros(nrMonths,nrPlayers);

c=1;

for i=1:n

    opsum(c,whiteId(i))=opsum(c,whiteId(i))+blackSkill(i);

    opsum(c,blackId(i))=opsum(c,blackId(i))+whiteSkill(i);

    opc(c,whiteId(i))=opc(c,whiteId(i))+1;

    opc(c,blackId(i))=opc(c,blackId(i))+1;

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

waos=zeros(n,1);

baos=zeros(n,1);

wmc0=zeros(n,1);

bmc0=zeros(n,1);

c=1;

for i=1:n

    waos(i)=opsum(c,whiteId(i))/opc(c,whiteId(i));

    baos(i)=opsum(c,blackId(i))/opc(c,blackId(i));

    wmc0(i)=opc(c,whiteId(i));

    bmc0(i)=opc(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% same statistics, now per 3 months

waos2=zeros(n,1);

baos2=zeros(n,1);

wmcblock=zeros(n,1);

bmcblock=zeros(n,1);

c=1;

d=1;

for i=1:n

    nrs=d:d+2;

    wmcblock(i)=wmcblock(i)+sum(opc(nrs,whiteId(i)));

    bmcblock(i)=bmcblock(i)+sum(opc(nrs,blackId(i)));

    waos2(i)=sum(opsum(nrs,whiteId(i)))/wmcblock(i);

    baos2(i)=sum(opsum(nrs,blackId(i)))/bmcblock(i);

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

        if mod(matchMonth(i+1)-1,3)==0

            d=d+3;

        end

    end

end

% variance in opponent strength

opskill=opsum./opc;

opsdif=zeros(nrMonths,nrPlayers);

c=1;

for i=1:n

    opsdif(c,whiteId(i))=opsdif(c,whiteId(i))+(blackSkill(i)-opskill(c,whiteId(i)))^2;

    opsdif(c,blackId(i))=opsdif(c,blackId(i))+(whiteSkill(i)-opskill(c,blackId(i)))^2;

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

opsdif=opsdif./opc;

opsdw=zeros(length(whiteId),1);

opsdb=zeros(length(whiteId),1);

c=1;

for i=1:length(whiteId)

    opsdw(i)=opsdif(c,whiteId(i));

    opsdb(i)=opsdif(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% /////////////// logistic regression //////////////////////

xlin=[-log((1./predictedWhiteScore)-1) whiteSkill.^2 blackSkill.^2 muWhite-muBlack whiteSkillVar-blackSkillVar ...

    (lpw./wmc0)-(lpb./bmc0) (lpw./wmc0).^2-(lpb./bmc0).^2 -log((wmc0./lpw)-1)+log((bmc0./lpb)-1) ...

    sqrt(abs(whiteSkill-blackSkill)).*sign(whiteSkill-blackSkill) sign(whiteSkill-blackSkill).*(whiteSkill-blackSkill).^2 ...

    -log((1./predictedWhiteScore)-1).*abs(whiteSkill-blackSkill) wmc6-bmc6 wmc12-bmc12 log(max(wmc6,0.5))-log(max(bmc6,0.5)) ...

    log(max(wmc12,0.5))-log(max(bmc12,0.5)) ones(length(predictedWhiteScore),1) ...

    waos-baos -log((1./predictedWhiteScore)-1).*(waos-baos).^2 waos2-baos2 log(wmc0)-log(bmc0) log(wmcblock)-log(bmcblock) ...

    sqrt(wmc0)-sqrt(bmc0) wmc0-bmc0 sqrt(wmcblock)-sqrt(bmcblock) ...

    lpw-lpb log(lpw)-log(lpb) sqrt(lpw)-sqrt(lpb) ...

    weightedOppSkillWhite-weightedOppSkillBlack log(max(weightedMatchCountWhite,0.5))-log(max(weightedMatchCountBlack,0.5)) ...

    weightedMatchCountWhite-weightedMatchCountBlack sqrt(weightedMatchCountWhite)-sqrt(weightedMatchCountBlack) ...

    log(treeCount(:,1))-log(treeCount(:,2))];

nTrain=sum(isTrainData);

opt=optimset('GradObj','on','Display','off','LargeScale','on','Hessian','on','TolFun',1e-8);

linpar=fminunc(@(p)logitobj(realizedWhiteScore(isTrainData),zeros(nTrain,1),ones(nTrain,1),xlin(isTrainData,:),p),[1; zeros(size(xlin,2)-1,1)],opt);

linp=xlin*linpar;

newWhiteScore=1./(1+exp(-linp));

% calculate predicted number of wins per player per month under new

% predictions

opsum=zeros(nrMonths,nrPlayers);

c=1;

for i=1:n

    opsum(c,whiteId(i))=opsum(c,whiteId(i))+newWhiteScore(i);

    opsum(c,blackId(i))=opsum(c,blackId(i))+1-newWhiteScore(i);

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% distribute

lpw=zeros(n,1);

lpb=zeros(n,1);

c=1;

for i=1:n

    lpw(i)=opsum(c,whiteId(i));

    lpb(i)=opsum(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% /////////////////// do clustering ////////////////////////

% kernel variables

xKernel=[whiteSkill-blackSkill waos-baos log(wmc0) log(bmc0) -log((wmc0./lpw)-1) -log((bmc0./lpb)-1) ...

    opsdw+opsdb log(treeCount(:,1))-log(treeCount(:,2)) weightedOppSkillWhite-weightedOppSkillBlack];

% weights

varweights=sqrt([2 2 3 3 1 1 2 0.5 1]);

% rescale

xmean=mean(xKernel);

xstdv=sqrt(mean(xKernel.^2)-xmean.^2);

xscale=xstdv./varweights;

xKernel=(xKernel-repmat(xmean,size(xKernel,1),1))./repmat(xscale,size(xKernel,1),1);

[~,pc]=princomp(xKernel);

pc=pc(:,1:5);

% cluster test set

clusterCenters = mykmeans(pc(~isTrainData,:),nrClusters);

% distances between cluster centers

nrClusters=size(clusterCenters,1);

clusterCenterDist=zeros(nrClusters,nrClusters);

for i=1:size(clusterCenters,2)

    clusterCenterDist=clusterCenterDist+(repmat(clusterCenters(:,i),1,nrClusters)-repmat(clusterCenters(:,i)',nrClusters,1)).^2;

end

% ///////////// do local logistic regression /////////////////////

% local regression variables

xextra=[ones(length(predictedWhiteScore),1) -log((1./newWhiteScore)-1) -log((wmc0./lpw)-1) -log((bmc0./lpb)-1) whiteSkill-blackSkill waos baos log(wmc0)-log(bmc0) ...

    log(lpw)-log(lpb) muWhite-muBlack whiteSkillVar-blackSkillVar weightedOppSkillWhite-weightedOppSkillBlack log(treeCount(:,1))-log(treeCount(:,2))];

xemean=mean(xextra);

xemean(1)=0;

xestdv=sqrt(mean(xextra.^2)-xemean.^2);

xestdv(1)=1;

xextra=(xextra-repmat(xemean,size(xextra,1),1))./repmat(xestdv,size(xextra,1),1);

k=size(xextra,2);

% train data

pctrain=pc(isTrainData,:);

wsTrain=realizedWhiteScore(isTrainData);

pwsTrain=newWhiteScore(isTrainData);

xextratrain=xextra(isTrainData,:);

linptrain=linp(isTrainData);

% nearest neighbours

[nni,dist] = knnsearch(pctrain,clusterCenters,'K',20000);

[nntest,disttest] = knnsearch(clusterCenters,pc,'K',6);

dist = dist./repmat(max(dist,[],2),1,size(dist,2));

w = (1-dist.^3).^3; % tricubic weight kernel

% loop over cluster centers

clusterpar=zeros(k,nrClusters);

for i=1:nrClusters

    par=zeros(k,1);

    % get data

    ind=nni(i,:)';

    realizedWhiteScoreind=wsTrain(ind);

    linpind=linptrain(ind);

    mxextraind=-xextratrain(ind,:);

    ipy=pwsTrain(ind);

    wind=w(i,:)';

    scaleMat=chol(mxextraind'*(mxextraind.*repmat(wind.*(ipy-ipy.^2),1,k))+eye(k));

    % optimization

    step=ones(k,1);

    sz=1;

    while true

        oldstep=step;

        grad = mxextraind'*(wind.*(ipy-realizedWhiteScoreind))-shrinkFactor*par;

        step = scaleMat\(scaleMat'\grad);

        par = par+sz*step;

        if step'*step > 1e-8

            ipy = 1./(1+exp(mxextraind*par-linpind));

            if step'*step>oldstep'*oldstep

                sz=sz/2;

            end

        else

            break

        end

    end

    % save parameters

    clusterpar(:,i)=par;

end

% prediction

for i=1:n

    if ~isTrainData(i)

        % interpolation

        idist=disttest(i,:)';

        inn=nntest(i,:);

        dscale=max(idist)*2;

        idist=idist/dscale;

        ibvd2=clusterCenterDist(inn,inn)/dscale^2;

        parw=exp(-ibvd2)\exp(-idist.^2);

        par=clusterpar(:,inn)*parw;

        % prediction

        newWhiteScore(i)=1/(1+exp(-(linp(i)+xextra(i,:)*par)));

    end

end

end

Miscellaneous helper functions

If a function isn’t in here, it is part of Matlab.

function [postMean,postVar,prob] = calcPost(priorMean,priorVar,realizedWhiteScore) %#eml

% calculate truncated normal statistics

if realizedWhiteScore==0

    rat11=0;

    pdfv1=0;

    cdfv1=0;

else

    if realizedWhiteScore==1

        a=1;

    else

        a=-1;

    end

    rat11=(a-priorMean)/sqrt(priorVar);

    pdfv1=exp(-0.5*rat11^2)/sqrt(2*pi);

    cdfv1=erfc(-rat11/sqrt(2))/2;

end

if realizedWhiteScore==1

    rat12=0;

    pdfv2=0;

    cdfv2=1;

else

    if realizedWhiteScore==0

        b=-1;

    else

        b=1;

    end

    rat12=(b-priorMean)/sqrt(priorVar);

    pdfv2=exp(-0.5*rat12^2)/sqrt(2*pi);

    cdfv2=erfc(-rat12/sqrt(2))/2;

end

prob=cdfv2-cdfv1;

rat2=(pdfv1-pdfv2)/prob;

postMean=priorMean+rat2*sqrt(priorVar);

postVar=priorVar*(1+(rat11*pdfv1-rat12*pdfv2)/prob-rat2^2);

end

function [nllh,grad,hess] = logitobj(ws,linp,w,x,par)

% predictions

py = 1./(1+exp(-x*par-linp));

unwllh=ws.*log(py)+(1-ws).*log(1-py);

nllh = -(w'*unwllh);

% gradient and hessian

if nargout>1

    grad=x'*(w.*(py-ws));

end

if nargout>2

    hess=x'*(x.*repmat(w.*(py-py.^2),1,size(x,2)));

end

end

function centers = mykmeans(X, k)

% sample size

n = size(X,1);

% farthest point initialization

centers=[X(ceil(n*rand),:); zeros(k-1,size(X,2))];

dist=Inf*ones(n,1);

for i=2:k

    dist=min(sum((X-repmat(centers(i-1,:),n,1)).^2,2),dist);

    [~,newc]=max(dist);

    centers(i,:)=X(newc,:);

end

clusternr=knnsearch(centers,X);

% 100 iterations of k-means

for itc=1:100

    summat=sparse(clusternr,1:n,true,k,n);

    centers=(summat*X)./(summat*ones(size(X)));

    clusternr=knnsearch(centers,X); % assign samples to the nearest centers

end

end

function treeCount=doRandomForest(matchMonth,predictedWhiteScore,whiteId,blackId,whiteSkill,blackSkill, ...

    muWhite,muBlack,varWhiteSkill,varBlackSkill,matchesPerMonth,weightedOppSkillWhite,weightedOppSkillBlack, ...

    weightedMatchCountWhite,weightedMatchCountBlack,isTrainData,realizedWhiteScore)

% dimension

nrMonths=max(matchMonth)-min(matchMonth)+1;

n=length(predictedWhiteScore);

nPlayers=max(max(whiteId),max(blackId));

% calculate predicted number of wins per player per month

opsum=zeros(nrMonths,nPlayers);

c=1;

for i=1:n

    opsum(c,whiteId(i))=opsum(c,whiteId(i))+predictedWhiteScore(i);

    opsum(c,blackId(i))=opsum(c,blackId(i))+1-predictedWhiteScore(i);

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% distribute

lpw=zeros(n,1);

lpb=zeros(n,1);

c=1;

for i=1:n

    lpw(i)=opsum(c,whiteId(i));

    lpb(i)=opsum(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

% sample counts

wmc6=zeros(n,1);

bmc6=zeros(n,1);

wmc12=zeros(n,1);

bmc12=zeros(n,1);

d=min(matchMonth)-1;

for i=1:n

    wmc6(i)=sum(matchesPerMonth(d-5:d,whiteId(i)));

    bmc6(i)=sum(matchesPerMonth(d-5:d,blackId(i)));

    wmc12(i)=sum(matchesPerMonth(d-11:d,whiteId(i)));

    bmc12(i)=sum(matchesPerMonth(d-11:d,blackId(i)));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        if mod(matchMonth(i+1)-1,3)==0

            d=d+3;

        end

    end

end

% average opponent strength & match counts

opsum=zeros(nrMonths,nPlayers);

opc=zeros(nrMonths,nPlayers);

c=1;

for i=1:n

    opsum(c,whiteId(i))=opsum(c,whiteId(i))+blackSkill(i);

    opsum(c,blackId(i))=opsum(c,blackId(i))+whiteSkill(i);

    opc(c,whiteId(i))=opc(c,whiteId(i))+1;

    opc(c,blackId(i))=opc(c,blackId(i))+1;

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

waos=zeros(n,1);

baos=zeros(n,1);

wmc0=zeros(n,1);

bmc0=zeros(n,1);

c=1;

for i=1:n

    waos(i)=opsum(c,whiteId(i))/opc(c,whiteId(i));

    baos(i)=opsum(c,blackId(i))/opc(c,blackId(i));

    wmc0(i)=opc(c,whiteId(i));

    bmc0(i)=opc(c,blackId(i));

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

    end

end

waos2=zeros(n,1);

baos2=zeros(n,1);

wmcblock=zeros(n,1);

bmcblock=zeros(n,1);

c=1;

d=1;

for i=1:n

    nrs=d:d+2;

    wmcblock(i)=wmcblock(i)+sum(opc(nrs,whiteId(i)));

    bmcblock(i)=bmcblock(i)+sum(opc(nrs,blackId(i)));

    waos2(i)=sum(opsum(nrs,whiteId(i)))/wmcblock(i);

    baos2(i)=sum(opsum(nrs,blackId(i)))/bmcblock(i);

    if i<n && matchMonth(i+1) ~= matchMonth(i)

        c=c+1;

        if mod(matchMonth(i+1)-1,3)==0

            d=d+3;

        end

    end

end

% random forest variables

xrf=[predictedWhiteScore whiteSkill-blackSkill whiteSkill+blackSkill muWhite-muBlack varWhiteSkill-varBlackSkill lpw./wmc0 lpb./bmc0 ...

    wmc6-bmc6 wmc12-bmc12 waos-baos waos+baos waos2-baos2 wmc0 bmc0 wmcblock bmcblock ...

    weightedOppSkillWhite-weightedOppSkillBlack weightedOppSkillWhite+weightedOppSkillBlack weightedMatchCountWhite weightedMatchCountBlack];

% matlabpool

matlabpool local 3

opt=statset('UseParallel','always');

treeCount=zeros(n,2);

for i=1:30

    realizedWhiteScoreSel=(realizedWhiteScore(isTrainData)>rand(ntr,1));

    weights=ones(ntr,1);

    weights(realizedWhiteScoreSel)=1./predictedWhiteScore(realizedWhiteScoreSel);

    weights(~realizedWhiteScoreSel)=1./(1-predictedWhiteScore(~realizedWhiteScoreSel));

    brf=TreeBagger(60,xrf(isTrainData,:),realizedWhiteScoreSel,'Method','classification','OOBPred','on','NPrint',10,'MinLeaf',100,'weights',weights,'Options',opt);

    [~,scores]=predict(brf,xrf(isTrainData,:),'useifort',brf.OOBIndices);

    treeCount(isTrainData,:) = treeCount(isTrainData,:)+scores;

    [~,scores]=predict(brf,xrf(~isTrainData,:));

    treeCount(~isTrainData,:) = treeCount(~isTrainData,:)+scores;

end

treeCount(treeCount<0.5)=0.5;

% close pool

matlabpool close

end

function [weightedOppSkillWhite,weightedOppSkillBlack,weightedMatchCountWhite,weightedMatchCountBlack] ...

    = getWeightedStats(whiteId,blackId,whiteSkill,blackSkill)

% get weighted schedule-statistics using rooted pagerank

% INPUT ARGUMENTS PER MONTH ONLY!

% settings

dampCoeff=0.9;

weightPower=0.2;

% unique player id's

n=length(whiteId);

[~,~,plnrs] = unique([whiteId; blackId]);

whiteIdnr=plnrs(1:n);

blackIdnr=plnrs(n+1:end);

nplayers=max(plnrs);

% connection matrix

amat=sparse(whiteIdnr,blackIdnr,1,nplayers,nplayers);

amat=amat+amat';

svec=sum(amat);

svec(svec==0)=1;

dm=sparse(1:nplayers,1:nplayers,dampCoeff./svec)*amat;

% statistics

weightedOppSkillWhite=zeros(n,1);

weightedOppSkillBlack=zeros(n,1);

weightedMatchCountWhite=0.01*ones(n,1);

weightedMatchCountBlack=0.01*ones(n,1);

% loop over players

for p=1:nplayers

    con=zeros(nplayers,1);

    con(p)=1;

    for i=1:50

        con=dm*con;

        con(p)=con(p)+(1-dampCoeff);

    end

    % loop over matches

    for i=1:n

        wi=whiteIdnr(i);

        bi=blackIdnr(i);

        if wi==p

            for j=1:n

                if j~=i

                    % white statistics

                    if whiteIdnr(j)==bi

                        w=con(blackIdnr(j))^weightPower;

                        weightedOppSkillBlack(i)=weightedOppSkillBlack(i)+w*blackSkill(j);

                        weightedMatchCountBlack(i)=weightedMatchCountBlack(i)+w;

                    elseif blackIdnr(j)==bi

                        w=con(whiteIdnr(j))^weightPower;

                        weightedOppSkillBlack(i)=weightedOppSkillBlack(i)+w*whiteSkill(j);

                        weightedMatchCountBlack(i)=weightedMatchCountBlack(i)+w;

                    end

                end

            end

        elseif bi==p

            for j=1:n

                if j~=i

                    % black statistics

                    if whiteIdnr(j)==wi

                        w=con(blackIdnr(j))^weightPower;

                        weightedOppSkillWhite(i)=weightedOppSkillWhite(i)+w*blackSkill(j);

                        weightedMatchCountWhite(i)=weightedMatchCountWhite(i)+w;

                    elseif blackIdnr(j)==bi

                        w=con(whiteIdnr(j))^weightPower;

                        weightedOppSkillWhite(i)=weightedOppSkillWhite(i)+w*whiteSkill(j);

                        weightedMatchCountWhite(i)=weightedMatchCountWhite(i)+w;

                    end

                end

            end

        end

    end

end

% average opponent strengths

weightedOppSkillWhite=weightedOppSkillWhite./weightedMatchCountWhite;

weightedOppSkillBlack=weightedOppSkillBlack./weightedMatchCountBlack;

end

Published with MATLAB® 7.11

 

Leave a Reply

Your email address will not be published. Required fields are marked *

*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code lang=""> <del datetime=""> <em> <i> <q cite=""> <strike> <strong> <pre lang="" extra="">