## Code for Figure 4.12

### main.m


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113n = 100;
p = 2;
q = 5;

% reproducible data!
rand("seed",-1);

% create the data set
% true parameters
B = [zeros(3,2); [-1 1]; [0 0]];

X = rand(n, q-1);
% add a nearly dependent final column
xq = X*ones(q-1,1)/sqrt(q-1) + 0.01*rand(n,1);

% fitting data
X = [X, xq];
Y = X*B + 0.1*rand(n,p);

% cross validation data
X2 = rand(n, q);
Y2 = X2*B + 0.1*rand(n,p);

Xdat = [X;X2]; Ydat = [Y;Y2];

% write out low precision data for posting on website
myfile = fopen('pca_pls_data.dat', 'w');
fprintf(myfile, '%     x1      x2      x3     x4      x5      y1      y2')
fprintf(myfile, '\n')
for i = 1: 2*n
fprintf(myfile, '%8.3f', Xdat(i,:), Ydat(i,:))
fprintf(myfile, '\n')
end
fclose(myfile);

% reload low precision data so we estimate with the same data

X = Data(1:n,1:q); Y = Data(1:n,q+1:q+p);
X2 = Data(n+1:end,1:q); Y2 = Data(n+1:end,q+1:q+p);

% full least squares
Bls = inv(X'*X)*X'*Y;

% PCR or SVD
[U,S,V] = svd(X);
% check all numbers of principal components (singular values)
Bsvd(1:q) = {zeros(q,p)};
Ysvd(1:q) = {zeros(n,p)};
for l = 1:q
Ul=U(:,1:l); Sl=S(1:l,1:l); Vl=V(:,1:l);
Bsvd(l) = Vl*inv(Sl)*Ul'*Y;
% compare prediction on cross-validation data
Ysvd(l) = {X*cell2mat(Bsvd(l))};
errsvd(l) = norm(Y-cell2mat(Ysvd(l)));
end

%PLS algorithm
U = []; T=[]; P=[]; Q=[];
E = X; F = Y;
Bpls(1:q) = {zeros(q,p)};
Ypls(1:q) = {zeros(n,p)};
for i = 1:q
[UU,SS,VV] = svd(E'*F);
u = UU(:,1); v=VV(:,1);
t = E*u; w = F*v;
t = t/sqrt(t'*t);
pp = E'*t; qq = F'*t;
E = E - t*pp'; F = F - t*qq';
U = [U u]; T = [T t]; P = [P pp]; Q=[Q qq];
R = U / (P'*U); % i.e., R = inv(P'U)*U
%Bpls = R*inv(T'*T)*T'*Y
Bpls(i) = {R*Q'};
Ypls(i) = {X*cell2mat(Bpls(i))};
errpls(i) = norm(Y-cell2mat(Ypls(i)));
end

figure()
plot(1:q, errsvd, '-o', 1:q, errpls, '-+')

%cross validation
for i = 1:q
Y2svd(i) = {X2*cell2mat(Bsvd(i))};
err2svd(i) = norm(Y2-cell2mat(Y2svd(i)));
Y2pls(i) = {X2*cell2mat(Bpls(i))};
err2pls(i) = norm(Y2-cell2mat(Y2pls(i)));
end

figure()
plot(1:q, err2svd, '-o', 1:q, err2pls, '-+')

for i = 1:q
figure()
plot(Y2(:,1), cell2mat(Y2pls(i))(:,1), 'o')
end
for i = 1:q
figure()
plot(Y2(:,1), cell2mat(Y2svd(i))(:,1), 'o')
end

% save data for book figure()

tab1 = [(1:q)', errsvd'];
tab2 = [(1:q)', errpls'];
tab3 = [Y, cell2mat(Ysvd)];
tab4 = [Y, cell2mat(Ypls)];
tab5 = [(1:q)', err2svd'];
tab6 = [(1:q)', err2pls'];
tab7 = [Y2, cell2mat(Y2svd)];
tab8 = [Y2, cell2mat(Y2pls)];

save pca_pls.dat tab1 tab2 tab3 tab4 tab5 tab6 tab7 tab8