% load data - each row is one example
[X,t] = wine_dataset;
X=X';
t=t';
% do random permutation to shuffle the data
shuffle = randperm(size(X, 1));
X = X(shuffle,:);
t = t(shuffle,:);

% select 80% of the data for training, 20 % for testing
training_portion = 0.8;
training_x = X(1:round(training_portion*size(X,1)),:);
training_t = t(1:round(training_portion*size(X,1)),:);
test_x = X(1+round(training_portion*size(X,1)):end,:);
test_t = t(1+round(training_portion*size(X,1)):end,:);

% initial W and b
number_of_classes = size(t,2);
W = 0.1*randn(size(X,2), number_of_classes);
b = zeros(1, number_of_classes);

% compute classifier scores
classifier = @(W, b, X) X*W + repmat(b, size(X, 1), 1);

% evaluate loss given classifier scores
scores = @(classifier) exp(classifier) ./ max(repmat(sum(exp(classifier), 2), 1, size(classifier,2)),1e-6);
loss = @(scores,t) sum(-log(scores(logical(t))), 1);

% objective to minimize
lambda = 0.001; % regularization parameter for weights
objective = @(W, b) loss(scores(classifier(W, b, training_x)),training_t)/size(training_x,1) ...
                    + 0.5 * lambda * sum(W(:).^2)+ 0.5 * lambda * sum(b(:).^2);

%% implement algorithm here


%% test and plot confusion
test_scores = scores(classifier(W,b,test_x));
figure, plotconfusion(test_t',test_scores');
