Commit bfba9cf3 authored by prashantsaroj's avatar prashantsaroj

Added solutions

parent bdc04f8c
function centroids = computeCentroids(X, idx, K)
%COMPUTECENTROIDS returns the new centroids by computing the means of the
%data points assigned to each centroid.
% centroids = COMPUTECENTROIDS(X, idx, K) returns the new centroids by
% computing the means of the data points assigned to each centroid. It is
% given a dataset X where each row is a single data point, a vector
% idx of centroid assignments (i.e. each entry in range [1..K]) for each
% example, and K, the number of centroids. You should return a matrix
% centroids, where each row of centroids is the mean of the data points
% assigned to it.
%
% Useful variables
[m n] = size(X);
% You need to return the following variables correctly.
centroids = zeros(K, n);
count = zeros(K);
sum_arr = zeros(K,n);
% ====================== YOUR CODE HERE ======================
% Instructions: Go over every centroid and compute mean of all points that
% belong to it. Concretely, the row vector centroids(i, :)
% should contain the mean of the data points assigned to
% centroid i.
%
% Note: You can use a for-loop over the centroids to compute this.
%
for i = 1:m
mu_i = idx(i);
sum_arr(mu_i,:) = sum_arr(mu_i,:) + X(i,:);
count(mu_i) = count(mu_i) + 1;
end
for i = 1:K
if count(i) ~= 0
centroids(i,:) = sum_arr(i,:)/count(i);
end
end
% =============================================================
end
function [h, display_array] = displayData(X, example_width)
%DISPLAYDATA Display 2D data in a nice grid
% [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
% stored in X in a nice grid. It returns the figure handle h and the
% displayed array if requested.
% Set example_width automatically if not passed in
if ~exist('example_width', 'var') || isempty(example_width)
example_width = round(sqrt(size(X, 2)));
end
% Gray Image
colormap(gray);
% Compute rows, cols
[m n] = size(X);
example_height = (n / example_width);
% Compute number of items to display
display_rows = floor(sqrt(m));
display_cols = ceil(m / display_rows);
% Between images padding
pad = 1;
% Setup blank display
display_array = - ones(pad + display_rows * (example_height + pad), ...
pad + display_cols * (example_width + pad));
% Copy each example into a patch on the display array
curr_ex = 1;
for j = 1:display_rows
for i = 1:display_cols
if curr_ex > m,
break;
end
% Copy the patch
% Get the max value of the patch
max_val = max(abs(X(curr_ex, :)));
display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
reshape(X(curr_ex, :), example_height, example_width) / max_val;
curr_ex = curr_ex + 1;
end
if curr_ex > m,
break;
end
end
% Display Image
h = imagesc(display_array, [-1 1]);
% Do not show axis
axis image off
drawnow;
end
function drawLine(p1, p2, varargin)
%DRAWLINE Draws a line from point p1 to point p2
% DRAWLINE(p1, p2) Draws a line from point p1 to point p2 and holds the
% current figure
plot([p1(1) p2(1)], [p1(2) p2(2)], varargin{:});
end
\ No newline at end of file
%% Machine Learning Online Class
% Exercise 7 | Principle Component Analysis and K-Means Clustering
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the
% exercise. You will need to complete the following functions:
%
% pca.m
% projectData.m
% recoverData.m
% computeCentroids.m
% findClosestCentroids.m
% kMeansInitCentroids.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% Initialization
clear ; close all; clc
%% ================= Part 1: Find Closest Centroids ====================
% To help you implement K-Means, we have divided the learning algorithm
% into two functions -- findClosestCentroids and computeCentroids. In this
% part, you should complete the code in the findClosestCentroids function.
%
fprintf('Finding closest centroids.\n\n');
% Load an example dataset that we will be using
load('ex7data2.mat');
% Select an initial set of centroids
K = 3; % 3 Centroids
initial_centroids = [3 3; 6 2; 8 5];
% Find the closest centroids for the examples using the
% initial_centroids
idx = findClosestCentroids(X, initial_centroids);
fprintf('Closest centroids for the first 3 examples: \n')
fprintf(' %d', idx(1:3));
fprintf('\n(the closest centroids should be 1, 3, 2 respectively)\n');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ===================== Part 2: Compute Means =========================
% After implementing the closest centroids function, you should now
% complete the computeCentroids function.
%
fprintf('\nComputing centroids means.\n\n');
% Compute means based on the closest centroids found in the previous part.
centroids = computeCentroids(X, idx, K);
fprintf('Centroids computed after initial finding of closest centroids: \n')
fprintf(' %f %f \n' , centroids');
fprintf('\n(the centroids should be\n');
fprintf(' [ 2.428301 3.157924 ]\n');
fprintf(' [ 5.813503 2.633656 ]\n');
fprintf(' [ 7.119387 3.616684 ]\n\n');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =================== Part 3: K-Means Clustering ======================
% After you have completed the two functions computeCentroids and
% findClosestCentroids, you have all the necessary pieces to run the
% kMeans algorithm. In this part, you will run the K-Means algorithm on
% the example dataset we have provided.
%
fprintf('\nRunning K-Means clustering on example dataset.\n\n');
% Load an example dataset
load('ex7data2.mat');
% Settings for running K-Means
K = 3;
max_iters = 10;
% For consistency, here we set centroids to specific values
% but in practice you want to generate them automatically, such as by
% settings them to be random examples (as can be seen in
% kMeansInitCentroids).
initial_centroids = [3 3; 6 2; 8 5];
% Run K-Means algorithm. The 'true' at the end tells our function to plot
% the progress of K-Means
[centroids, idx] = runkMeans(X, initial_centroids, max_iters, true);
fprintf('\nK-Means Done.\n\n');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ============= Part 4: K-Means Clustering on Pixels ===============
% In this exercise, you will use K-Means to compress an image. To do this,
% you will first run K-Means on the colors of the pixels in the image and
% then you will map each pixel onto its closest centroid.
%
% You should now complete the code in kMeansInitCentroids.m
%
fprintf('\nRunning K-Means clustering on pixels from an image.\n\n');
% Load an image of a bird
A = double(imread('bird_small.png'));
% If imread does not work for you, you can try instead
% load ('bird_small.mat');
A = A / 255; % Divide by 255 so that all values are in the range 0 - 1
% Size of the image
img_size = size(A);
% Reshape the image into an Nx3 matrix where N = number of pixels.
% Each row will contain the Red, Green and Blue pixel values
% This gives us our dataset matrix X that we will use K-Means on.
X = reshape(A, img_size(1) * img_size(2), 3);
% Run your K-Means algorithm on this data
% You should try different values of K and max_iters here
K = 16;
max_iters = 10;
% When using K-Means, it is important the initialize the centroids
% randomly.
% You should complete the code in kMeansInitCentroids.m before proceeding
initial_centroids = kMeansInitCentroids(X, K);
% Run K-Means
[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ================= Part 5: Image Compression ======================
% In this part of the exercise, you will use the clusters of K-Means to
% compress an image. To do this, we first find the closest clusters for
% each example. After that, we
fprintf('\nApplying K-Means to compress an image.\n\n');
% Find closest cluster members
idx = findClosestCentroids(X, centroids);
% Essentially, now we have represented the image X as in terms of the
% indices in idx.
% We can now recover the image from the indices (idx) by mapping each pixel
% (specified by its index in idx) to the centroid value
X_recovered = centroids(idx,:);
% Reshape the recovered image into proper dimensions
X_recovered = reshape(X_recovered, img_size(1), img_size(2), 3);
% Display the original image
subplot(1, 2, 1);
imagesc(A);
title('Original');
% Display compressed image side by side
subplot(1, 2, 2);
imagesc(X_recovered)
title(sprintf('Compressed, with %d colors.', K));
fprintf('Program paused. Press enter to continue.\n');
pause;
%% Machine Learning Online Class
% Exercise 7 | Principle Component Analysis and K-Means Clustering
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the
% exercise. You will need to complete the following functions:
%
% pca.m
% projectData.m
% recoverData.m
% computeCentroids.m
% findClosestCentroids.m
% kMeansInitCentroids.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% Initialization
clear ; close all; clc
%% ================== Part 1: Load Example Dataset ===================
% We start this exercise by using a small dataset that is easily to
% visualize
%
fprintf('Visualizing example dataset for PCA.\n\n');
% The following command loads the dataset. You should now have the
% variable X in your environment
load ('ex7data1.mat');
% Visualize the example dataset
plot(X(:, 1), X(:, 2), 'bo');
axis([0.5 6.5 2 8]); axis square;
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =============== Part 2: Principal Component Analysis ===============
% You should now implement PCA, a dimension reduction technique. You
% should complete the code in pca.m
%
fprintf('\nRunning PCA on example dataset.\n\n');
% Before running PCA, it is important to first normalize X
[X_norm, mu, sigma] = featureNormalize(X);
% Run PCA
[U, S] = pca(X_norm);
% Compute mu, the mean of the each feature
% Draw the eigenvectors centered at mean of data. These lines show the
% directions of maximum variations in the dataset.
hold on;
drawLine(mu, mu + 1.5 * S(1,1) * U(:,1)', '-k', 'LineWidth', 2);
drawLine(mu, mu + 1.5 * S(2,2) * U(:,2)', '-k', 'LineWidth', 2);
hold off;
fprintf('Top eigenvector: \n');
fprintf(' U(:,1) = %f %f \n', U(1,1), U(2,1));
fprintf('\n(you should expect to see -0.707107 -0.707107)\n');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =================== Part 3: Dimension Reduction ===================
% You should now implement the projection step to map the data onto the
% first k eigenvectors. The code will then plot the data in this reduced
% dimensional space. This will show you what the data looks like when
% using only the corresponding eigenvectors to reconstruct it.
%
% You should complete the code in projectData.m
%
fprintf('\nDimension reduction on example dataset.\n\n');
% Plot the normalized dataset (returned from pca)
plot(X_norm(:, 1), X_norm(:, 2), 'bo');
axis([-4 3 -4 3]); axis square
% Project the data onto K = 1 dimension
K = 1;
Z = projectData(X_norm, U, K);
fprintf('Projection of the first example: %f\n', Z(1));
fprintf('\n(this value should be about 1.481274)\n\n');
X_rec = recoverData(Z, U, K);
fprintf('Approximation of the first example: %f %f\n', X_rec(1, 1), X_rec(1, 2));
fprintf('\n(this value should be about -1.047419 -1.047419)\n\n');
% Draw lines connecting the projected points to the original points
hold on;
plot(X_rec(:, 1), X_rec(:, 2), 'ro');
for i = 1:size(X_norm, 1)
drawLine(X_norm(i,:), X_rec(i,:), '--k', 'LineWidth', 1);
end
hold off
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =============== Part 4: Loading and Visualizing Face Data =============
% We start the exercise by first loading and visualizing the dataset.
% The following code will load the dataset into your environment
%
fprintf('\nLoading face dataset.\n\n');
% Load Face dataset
load ('ex7faces.mat')
% Display the first 100 faces in the dataset
displayData(X(1:100, :));
fprintf('Program paused. Press enter to continue.\n');
pause;
%% =========== Part 5: PCA on Face Data: Eigenfaces ===================
% Run PCA and visualize the eigenvectors which are in this case eigenfaces
% We display the first 36 eigenfaces.
%
fprintf(['\nRunning PCA on face dataset.\n' ...
'(this might take a minute or two ...)\n\n']);
% Before running PCA, it is important to first normalize X by subtracting
% the mean value from each feature
[X_norm, mu, sigma] = featureNormalize(X);
% Run PCA
[U, S] = pca(X_norm);
% Visualize the top 36 eigenvectors found
displayData(U(:, 1:36)');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ============= Part 6: Dimension Reduction for Faces =================
% Project images to the eigen space using the top k eigenvectors
% If you are applying a machine learning algorithm
fprintf('\nDimension reduction for face dataset.\n\n');
K = 100;
Z = projectData(X_norm, U, K);
fprintf('The projected data Z has a size of: ')
fprintf('%d ', size(Z));
fprintf('\n\nProgram paused. Press enter to continue.\n');
pause;
%% ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
% Project images to the eigen space using the top K eigen vectors and
% visualize only using those K dimensions
% Compare to the original input, which is also displayed
fprintf('\nVisualizing the projected (reduced dimension) faces.\n\n');
K = 100;
X_rec = recoverData(Z, U, K);
% Display normalized data
subplot(1, 2, 1);
displayData(X_norm(1:100,:));
title('Original faces');
axis square;
% Display reconstructed data from only k eigenfaces
subplot(1, 2, 2);
displayData(X_rec(1:100,:));
title('Recovered faces');
axis square;
fprintf('Program paused. Press enter to continue.\n');
pause;
%% === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
% One useful application of PCA is to use it to visualize high-dimensional
% data. In the last K-Means exercise you ran K-Means on 3-dimensional
% pixel colors of an image. We first visualize this output in 3D, and then
% apply PCA to obtain a visualization in 2D.
close all; close all; clc
% Reload the image from the previous exercise and run K-Means on it
% For this to work, you need to complete the K-Means assignment first
A = double(imread('bird_small.png'));
% If imread does not work for you, you can try instead
% load ('bird_small.mat');
A = A / 255;
img_size = size(A);
X = reshape(A, img_size(1) * img_size(2), 3);
K = 16;
max_iters = 10;
initial_centroids = kMeansInitCentroids(X, K);
[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
% Sample 1000 random indexes (since working with all the data is
% too expensive. If you have a fast computer, you may increase this.
sel = floor(rand(1000, 1) * size(X, 1)) + 1;
% Setup Color Palette
palette = hsv(K);
colors = palette(idx(sel), :);
% Visualize the data and centroid memberships in 3D
figure;
scatter3(X(sel, 1), X(sel, 2), X(sel, 3), 10, colors);
title('Pixel dataset plotted in 3D. Color shows centroid memberships');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
% Use PCA to project this cloud to 2D for visualization
% Subtract the mean to use PCA
[X_norm, mu, sigma] = featureNormalize(X);
% PCA and project the data to 2D
[U, S] = pca(X_norm);
Z = projectData(X_norm, U, 2);
% Plot in 2D
figure;
plotDataPoints(Z(sel, :), idx(sel), K);
title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction');
fprintf('Program paused. Press enter to continue.\n');
pause;
function [X_norm, mu, sigma] = featureNormalize(X)
%FEATURENORMALIZE Normalizes the features in X
% FEATURENORMALIZE(X) returns a normalized version of X where
% the mean value of each feature is 0 and the standard deviation
% is 1. This is often a good preprocessing step to do when
% working with learning algorithms.
mu = mean(X);
X_norm = bsxfun(@minus, X, mu);
sigma = std(X_norm);
X_norm = bsxfun(@rdivide, X_norm, sigma);
% ============================================================
end
function idx = findClosestCentroids(X, centroids)
%FINDCLOSESTCENTROIDS computes the centroid memberships for every example
% idx = FINDCLOSESTCENTROIDS (X, centroids) returns the closest centroids
% in idx for a dataset X where each row is a single example. idx = m x 1
% vector of centroid assignments (i.e. each entry in range [1..K])
%
% Set K
K = size(centroids, 1);
% You need to return the following variables correctly.
idx = zeros(size(X,1), 1);
m = size(X,1);
% ====================== YOUR CODE HERE ======================
% Instructions: Go over every example, find its closest centroid, and store
% the index inside idx at the appropriate location.
% Concretely, idx(i) should contain the index of the centroid
% closest to example i. Hence, it should be a value in the
% range 1..K
%
% Note: You can use a for-loop over the examples to compute this.
%
for i = 1:m
norm_matrix = sum((X(i,:)-centroids).^2,2);
[~,idx(i)] = min(norm_matrix);
end
% =============================================================
end
function centroids = kMeansInitCentroids(X, K)
%KMEANSINITCENTROIDS This function initializes K centroids that are to be
%used in K-Means on the dataset X
% centroids = KMEANSINITCENTROIDS(X, K) returns K initial centroids to be
% used with the K-Means on the dataset X
%
% You should return this values correctly
centroids = zeros(K, size(X, 2));
% ====================== YOUR CODE HERE ======================
% Instructions: You should set centroids to randomly chosen examples from
% the dataset X
%
randidx = randperm(size(X, 1));
% Take the first K examples as centroids
centroids = X(randidx(1:K), :);
% =============================================================
end
The author of "jsonlab" toolbox is Qianqian Fang. Qianqian
is currently an Assistant Professor at Massachusetts General Hospital,
Harvard Medical School.
Address: Martinos Center for Biomedical Imaging,
Massachusetts General Hospital,
Harvard Medical School
Bldg 149, 13th St, Charlestown, MA 02129, USA
URL: http://nmr.mgh.harvard.edu/~fangq/
Email: <fangq at nmr.mgh.harvard.edu> or <fangqq at gmail.com>
The script loadjson.m was built upon previous works by
- Nedialko Krouchev: http://www.mathworks.com/matlabcentral/fileexchange/25713
date: 2009/11/02
- François Glineur: http://www.mathworks.com/matlabcentral/fileexchange/23393
date: 2009/03/22
- Joel Feenstra: http://www.mathworks.com/matlabcentral/fileexchange/20565
date: 2008/07/03
This toolbox contains patches submitted by the following contributors:
- Blake Johnson <bjohnso at bbn.com>
part of revision 341
- Niclas Borlin <Niclas.Borlin at cs.umu.se>
various fixes in revision 394, including
- loadjson crashes for all-zero sparse matrix.
- loadjson crashes for empty sparse matrix.
- Non-zero size of 0-by-N and N-by-0 empty matrices is lost after savejson/loadjson.
- loadjson crashes for sparse real column vector.
- loadjson crashes for sparse complex column vector.
- Data is corrupted by savejson for sparse real row vector.
- savejson crashes for sparse complex row vector.
- Yul Kang <yul.kang.on at gmail.com>
patches for svn revision 415.
- savejson saves an empty cell array as [] instead of null
- loadjson differentiates an empty struct from an empty array
============================================================================
JSONlab - a toolbox to encode/decode JSON/UBJSON files in MATLAB/Octave
----------------------------------------------------------------------------
JSONlab ChangeLog (key features marked by *):
== JSONlab 1.0 (codename: Optimus - Final), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2015/01/02 polish help info for all major functions, update examples, finalize 1.0
2014/12/19 fix a bug to strictly respect NoRowBracket in savejson
== JSONlab 1.0.0-RC2 (codename: Optimus - RC2), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/11/22 show progress bar in loadjson ('ShowProgress')
2014/11/17 add Compact option in savejson to output compact JSON format ('Compact')
2014/11/17 add FastArrayParser in loadjson to specify fast parser applicable levels
2014/09/18 start official github mirror: https://github.com/fangq/jsonlab
== JSONlab 1.0.0-RC1 (codename: Optimus - RC1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/09/17 fix several compatibility issues when running on octave versions 3.2-3.8
2014/09/17 support 2D cell and struct arrays in both savejson and saveubjson
2014/08/04 escape special characters in a JSON string
2014/02/16 fix a bug when saving ubjson files
== JSONlab 0.9.9 (codename: Optimus - beta), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/01/22 use binary read and write in saveubjson and loadubjson
== JSONlab 0.9.8-1 (codename: Optimus - alpha update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2013/10/07 better round-trip conservation for empty arrays and structs (patch submitted by Yul Kang)
== JSONlab 0.9.8 (codename: Optimus - alpha), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2013/08/23 *universal Binary JSON (UBJSON) support, including both saveubjson and loadubjson
== JSONlab 0.9.1 (codename: Rodimus, update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/12/18 *handling of various empty and sparse matrices (fixes submitted by Niclas Borlin)
== JSONlab 0.9.0 (codename: Rodimus), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/06/17 *new format for an invalid leading char, unpacking hex code in savejson
2012/06/01 support JSONP in savejson
2012/05/25 fix the empty cell bug (reported by Cyril Davin)
2012/04/05 savejson can save to a file (suggested by Patrick Rapin)
== JSONlab 0.8.1 (codename: Sentiel, Update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/02/28 loadjson quotation mark escape bug, see http://bit.ly/yyk1nS
2012/01/25 patch to handle root-less objects, contributed by Blake Johnson
== JSONlab 0.8.0 (codename: Sentiel), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/01/13 *speed up loadjson by 20 fold when parsing large data arrays in matlab
2012/01/11 remove row bracket if an array has 1 element, suggested by Mykel Kochenderfer
2011/12/22 *accept sequence of 'param',value input in savejson and loadjson
2011/11/18 fix struct array bug reported by Mykel Kochenderfer
== JSONlab 0.5.1 (codename: Nexus Update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2011/10/21 fix a bug in loadjson, previous code does not use any of the acceleration
2011/10/20 loadjson supports JSON collections - concatenated JSON objects
== JSONlab 0.5.0 (codename: Nexus), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2011/10/16 package and release jsonlab 0.5.0
2011/10/15 *add json demo and regression test, support cpx numbers, fix double quote bug
2011/10/11 *speed up readjson dramatically, interpret _Array* tags, show data in root level
2011/10/10 create jsonlab project, start jsonlab website, add online documentation
2011/10/07 *speed up savejson by 25x using sprintf instead of mat2str, add options support
2011/10/06 *savejson works for structs, cells and arrays
2011/09/09 derive loadjson from JSON parser from MATLAB Central, draft savejson.m
Copyright 2011-2015 Qianqian Fang <fangq at nmr.mgh.harvard.edu>. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of the copyright holders.
This diff is collapsed.
function val=jsonopt(key,default,varargin)
%
% val=jsonopt(key,default,optstruct)
%
% setting options based on a struct. The struct can be produced
% by varargin2struct from a list of 'param','value' pairs
%
% authors:Qianqian Fang (fangq<at> nmr.mgh.harvard.edu)
%
% $Id: loadjson.m 371 2012-06-20 12:43:06Z fangq $
%
% input:
% key: a string with which one look up a value from a struct
% default: if the key does not exist, return default
% optstruct: a struct where each sub-field is a key
%
% output:
% val: if key exists, val=optstruct.key; otherwise val=default
%
% license:
% BSD, see LICENSE_BSD.txt files for details
%
% -- this function is part of jsonlab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
%
val=default;
if(nargin<=2) return; end
opt=varargin{1};
if(isstruct(opt) && isfield(opt,key))
val=getfield(opt,key);
end
This diff is collapsed.
This diff is collapsed.
function s=mergestruct(s1,s2)
%
% s=mergestruct(s1,s2)
%
% merge two struct objects into one
%
% authors:Qianqian Fang (fangq<at> nmr.mgh.harvard.edu)
% date: 2012/12/22
%
% input:
% s1,s2: a struct object, s1 and s2 can not be arrays
%
% output:
% s: the merged struct object. fields in s1 and s2 will be combined in s.
%
% license:
% BSD, see LICENSE_BSD.txt files for details
%
% -- this function is part of jsonlab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
%
if(~isstruct(s1) || ~isstruct(s2))
error('input parameters contain non-struct');
end
if(length(s1)>1 || length(s2)>1)
error('can not merge struct arrays');
end
fn=fieldnames(s2);
s=s1;
for i=1:length(fn)
s=setfield(s,fn{i},getfield(s2,fn{i}));
end
This diff is collapsed.
This diff is collapsed.
function opt=varargin2struct(varargin)
%
% opt=varargin2struct('param1',value1,'param2',value2,...)
% or
% opt=varargin2struct(...,optstruct,...)
%
% convert a series of input parameters into a structure
%
% authors:Qianqian Fang (fangq<at> nmr.mgh.harvard.edu)
% date: 2012/12/22
%
% input:
% 'param', value: the input parameters should be pairs of a string and a value
% optstruct: if a parameter is a struct, the fields will be merged to the output struct
%
% output:
% opt: a struct where opt.param1=value1, opt.param2=value2 ...
%
% license:
% BSD, see LICENSE_BSD.txt files for details
%
% -- this function is part of jsonlab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
%
len=length(varargin);
opt=struct;
if(len==0) return; end
i=1;
while(i<=len)
if(isstruct(varargin{i}))
opt=mergestruct(opt,varargin{i});
elseif(ischar(varargin{i}) && i<len)
opt=setfield(opt,varargin{i},varargin{i+1});
i=i+1;
else
error('input must be in the form of ...,''name'',value,... pairs or structs');
end
i=i+1;
end
function str = makeValidFieldName(str)
% From MATLAB doc: field names must begin with a letter, which may be
% followed by any combination of letters, digits, and underscores.
% Invalid characters will be converted to underscores, and the prefix
% "x0x[Hex code]_" will be added if the first character is not a letter.
isoct=exist('OCTAVE_VERSION','builtin');
pos=regexp(str,'^[^A-Za-z]','once');
if(~isempty(pos))
if(~isoct)
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
end
end
if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' ))) return; end
if(~isoct)
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
pos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(pos)) return; end
str0=str;
pos0=[0 pos(:)' length(str)];
str='';
for i=1:length(pos)
str=[str str0(pos0(i)+1:pos(i)-1) sprintf('_0x%X_',str0(pos(i)))];
end
if(pos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
end
function submitWithConfiguration(conf)
addpath('./lib/jsonlab');
parts = parts(conf);
fprintf('== Submitting solutions | %s...\n', conf.itemName);
tokenFile = 'token.mat';
if exist(tokenFile, 'file')
load(tokenFile);
[email token] = promptToken(email, token, tokenFile);
else
[email token] = promptToken('', '', tokenFile);
end
if isempty(token)
fprintf('!! Submission Cancelled\n');
return
end
try
response = submitParts(conf, email, token, parts);
catch
e = lasterror();
fprintf('\n!! Submission failed: %s\n', e.message);
fprintf('\n\nFunction: %s\nFileName: %s\nLineNumber: %d\n', ...
e.stack(1,1).name, e.stack(1,1).file, e.stack(1,1).line);
fprintf('\nPlease correct your code and resubmit.\n');
return
end
if isfield(response, 'errorMessage')
fprintf('!! Submission failed: %s\n', response.errorMessage);
elseif isfield(response, 'errorCode')
fprintf('!! Submission failed: %s\n', response.message);
else
showFeedback(parts, response);
save(tokenFile, 'email', 'token');
end
end
function [email token] = promptToken(email, existingToken, tokenFile)
if (~isempty(email) && ~isempty(existingToken))
prompt = sprintf( ...
'Use token from last successful submission (%s)? (Y/n): ', ...
email);
reenter = input(prompt, 's');
if (isempty(reenter) || reenter(1) == 'Y' || reenter(1) == 'y')
token = existingToken;
return;
else
delete(tokenFile);
end
end
email = input('Login (email address): ', 's');
token = input('Token: ', 's');
end
function isValid = isValidPartOptionIndex(partOptions, i)
isValid = (~isempty(i)) && (1 <= i) && (i <= numel(partOptions));
end
function response = submitParts(conf, email, token, parts)
body = makePostBody(conf, email, token, parts);
submissionUrl = submissionUrl();
responseBody = getResponse(submissionUrl, body);
jsonResponse = validateResponse(responseBody);
response = loadjson(jsonResponse);
end
function body = makePostBody(conf, email, token, parts)
bodyStruct.assignmentSlug = conf.assignmentSlug;
bodyStruct.submitterEmail = email;
bodyStruct.secret = token;
bodyStruct.parts = makePartsStruct(conf, parts);
opt.Compact = 1;
body = savejson('', bodyStruct, opt);
end
function partsStruct = makePartsStruct(conf, parts)
for part = parts
partId = part{:}.id;
fieldName = makeValidFieldName(partId);
outputStruct.output = conf.output(partId);
partsStruct.(fieldName) = outputStruct;
end
end
function [parts] = parts(conf)
parts = {};
for partArray = conf.partArrays
part.id = partArray{:}{1};
part.sourceFiles = partArray{:}{2};
part.name = partArray{:}{3};
parts{end + 1} = part;
end
end
function showFeedback(parts, response)
fprintf('== \n');
fprintf('== %43s | %9s | %-s\n', 'Part Name', 'Score', 'Feedback');
fprintf('== %43s | %9s | %-s\n', '---------', '-----', '--------');
for part = parts
score = '';
partFeedback = '';
partFeedback = response.partFeedbacks.(makeValidFieldName(part{:}.id));
partEvaluation = response.partEvaluations.(makeValidFieldName(part{:}.id));
score = sprintf('%d / %3d', partEvaluation.score, partEvaluation.maxScore);
fprintf('== %43s | %9s | %-s\n', part{:}.name, score, partFeedback);
end
evaluation = response.evaluation;
totalScore = sprintf('%d / %d', evaluation.score, evaluation.maxScore);
fprintf('== --------------------------------\n');
fprintf('== %43s | %9s | %-s\n', '', totalScore, '');
fprintf('== \n');
end
% use urlread or curl to send submit results to the grader and get a response
function response = getResponse(url, body)
% try using urlread() and a secure connection
params = {'jsonBody', body};
[response, success] = urlread(url, 'post', params);
if (success == 0)
% urlread didn't work, try curl & the peer certificate patch
if ispc
% testing note: use 'jsonBody =' for a test case
json_command = sprintf('echo jsonBody=%s | curl -k -X POST -d @- %s', body, url);
else
% it's linux/OS X, so use the other form
json_command = sprintf('echo ''jsonBody=%s'' | curl -k -X POST -d @- %s', body, url);
end
% get the response body for the peer certificate patch method
[code, response] = system(json_command);
% test the success code
if (code ~= 0)
fprintf('[error] submission with curl() was not successful\n');
end
end
end
% validate the grader's response
function response = validateResponse(resp)
% test if the response is json or an HTML page
isJson = length(resp) > 0 && resp(1) == '{';
isHtml = findstr(lower(resp), '<html');
if (isJson)
response = resp;
elseif (isHtml)
% the response is html, so it's probably an error message
printHTMLContents(resp);
error('Grader response is an HTML message');
else
error('Grader sent no response');
end
end
% parse a HTML response and print it's contents
function printHTMLContents(response)
strippedResponse = regexprep(response, '<[^>]+>', ' ');
strippedResponse = regexprep(strippedResponse, '[\t ]+', ' ');
fprintf(strippedResponse);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Service configuration
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function submissionUrl = submissionUrl()
submissionUrl = 'https://www-origin.coursera.org/api/onDemandProgrammingImmediateFormSubmissions.v1';
end
function [U, S] = pca(X)
%PCA Run principal component analysis on the dataset X
% [U, S, X] = pca(X) computes eigenvectors of the covariance matrix of X
% Returns the eigenvectors U, the eigenvalues (on diagonal) in S
%
% Useful values
[m, n] = size(X);
% You need to return the following variables correctly.
U = zeros(n);
S = zeros(n);
% ====================== YOUR CODE HERE ======================
% Instructions: You should first compute the covariance matrix. Then, you
% should use the "svd" function to compute the eigenvectors
% and eigenvalues of the covariance matrix.
%
% Note: When computing the covariance matrix, remember to divide by m (the
% number of examples).
%
Covar = (X')*X/m;
[U, S, ~] = svd(Covar);
% =========================================================================
end
function plotDataPoints(X, idx, K)
%PLOTDATAPOINTS plots data points in X, coloring them so that those with the same
%index assignments in idx have the same color
% PLOTDATAPOINTS(X, idx, K) plots data points in X, coloring them so that those
% with the same index assignments in idx have the same color
% Create palette
palette = hsv(K + 1);
colors = palette(idx, :);
% Plot the data
scatter(X(:,1), X(:,2), 15, colors);
end
function plotProgresskMeans(X, centroids, previous, idx, K, i)
%PLOTPROGRESSKMEANS is a helper function that displays the progress of
%k-Means as it is running. It is intended for use only with 2D data.
% PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
% points with colors assigned to each centroid. With the previous
% centroids, it also plots a line between the previous locations and
% current locations of the centroids.
%
% Plot the examples
plotDataPoints(X, idx, K);
% Plot the centroids as black x's
plot(centroids(:,1), centroids(:,2), 'x', ...
'MarkerEdgeColor','k', ...
'MarkerSize', 10, 'LineWidth', 3);
% Plot the history of the centroids with lines
for j=1:size(centroids,1)
drawLine(centroids(j, :), previous(j, :));
end
% Title
title(sprintf('Iteration number %d', i))
end
function Z = projectData(X, U, K)
%PROJECTDATA Computes the reduced data representation when projecting only
%on to the top k eigenvectors
% Z = projectData(X, U, K) computes the projection of
% the normalized inputs X into the reduced dimensional space spanned by
% the first K columns of U. It returns the projected examples in Z.
%
% You need to return the following variables correctly.
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the projection of the data using only the top K
% eigenvectors in U (first K columns).
% For the i-th example X(i,:), the projection on to the k-th
% eigenvector is given as follows:
% x = X(i, :)';
% projection_k = x' * U(:, k);
%
U_reduce = U(:,1:K);
Z = X*U_reduce;
% =============================================================
end
function X_rec = recoverData(Z, U, K)
%RECOVERDATA Recovers an approximation of the original data when using the
%projected data
% X_rec = RECOVERDATA(Z, U, K) recovers an approximation the
% original data that has been reduced to K dimensions. It returns the
% approximate reconstruction in X_rec.
%
% You need to return the following variables correctly.
X_rec = zeros(size(Z, 1), size(U, 1));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the approximation of the data by projecting back
% onto the original space using the top K eigenvectors in U.
%
% For the i-th example Z(i,:), the (approximate)
% recovered data for dimension j is given as follows:
% v = Z(i, :)';
% recovered_j = v' * U(j, 1:K)';
%
% Notice that U(j, 1:K) is a row vector.
%
U_reduce = U(:,1:K);
X_rec = Z*U_reduce';
% =============================================================
end
function [centroids, idx] = runkMeans(X, initial_centroids, ...
max_iters, plot_progress)
%RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
%is a single example
% [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
% plot_progress) runs the K-Means algorithm on data matrix X, where each
% row of X is a single example. It uses initial_centroids used as the
% initial centroids. max_iters specifies the total number of interactions
% of K-Means to execute. plot_progress is a true/false flag that
% indicates if the function should also plot its progress as the
% learning happens. This is set to false by default. runkMeans returns
% centroids, a Kxn matrix of the computed centroids and idx, a m x 1
% vector of centroid assignments (i.e. each entry in range [1..K])
%
% Set default value for plot progress
if ~exist('plot_progress', 'var') || isempty(plot_progress)
plot_progress = false;
end
% Plot the data if we are plotting progress
if plot_progress
figure;
hold on;
end
% Initialize values
[m n] = size(X);
K = size(initial_centroids, 1);
centroids = initial_centroids;
previous_centroids = centroids;
idx = zeros(m, 1);
% Run K-Means
for i=1:max_iters
% Output progress
fprintf('K-Means iteration %d/%d...\n', i, max_iters);
if exist('OCTAVE_VERSION')
fflush(stdout);
end
% For each example in X, assign it to the closest centroid
idx = findClosestCentroids(X, centroids);
% Optionally, plot progress here
if plot_progress
plotProgresskMeans(X, centroids, previous_centroids, idx, K, i);
previous_centroids = centroids;
fprintf('Press enter to continue.\n');
pause;
end
% Given the memberships, compute new centroids
centroids = computeCentroids(X, idx, K);
end
% Hold off if we are plotting progress
if plot_progress
hold off;
end
end
function submit()
addpath('./lib');
conf.assignmentSlug = 'k-means-clustering-and-pca';
conf.itemName = 'K-Means Clustering and PCA';
conf.partArrays = { ...
{ ...
'1', ...
{ 'findClosestCentroids.m' }, ...
'Find Closest Centroids (k-Means)', ...
}, ...
{ ...
'2', ...
{ 'computeCentroids.m' }, ...
'Compute Centroid Means (k-Means)', ...
}, ...
{ ...
'3', ...
{ 'pca.m' }, ...
'PCA', ...
}, ...
{ ...
'4', ...
{ 'projectData.m' }, ...
'Project Data (PCA)', ...
}, ...
{ ...
'5', ...
{ 'recoverData.m' }, ...
'Recover Data (PCA)', ...
}, ...
};
conf.output = @output;
submitWithConfiguration(conf);
end
function out = output(partId, auxstring)
% Random Test Cases
X = reshape(sin(1:165), 15, 11);
Z = reshape(cos(1:121), 11, 11);
C = Z(1:5, :);
idx = (1 + mod(1:15, 3))';
if partId == '1'
idx = findClosestCentroids(X, C);
out = sprintf('%0.5f ', idx(:));
elseif partId == '2'
centroids = computeCentroids(X, idx, 3);
out = sprintf('%0.5f ', centroids(:));
elseif partId == '3'
[U, S] = pca(X);
out = sprintf('%0.5f ', abs([U(:); S(:)]));
elseif partId == '4'
X_proj = projectData(X, Z, 5);
out = sprintf('%0.5f ', X_proj(:));
elseif partId == '5'
X_rec = recoverData(X(:,1:5), Z, 5);
out = sprintf('%0.5f ', X_rec(:));
end
end
dndOYxgCeMRoq7kG
\ No newline at end of file
function checkCostFunction(lambda)
%CHECKCOSTFUNCTION Creates a collaborative filering problem
%to check your cost function and gradients
% CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem
% to check your cost function and gradients, it will output the
% analytical gradients produced by your code and the numerical gradients
% (computed using computeNumericalGradient). These two gradient
% computations should result in very similar values.
% Set lambda
if ~exist('lambda', 'var') || isempty(lambda)
lambda = 0;
end
%% Create small problem
X_t = rand(4, 3);
Theta_t = rand(5, 3);
% Zap out most entries
Y = X_t * Theta_t';
Y(rand(size(Y)) > 0.5) = 0;
R = zeros(size(Y));
R(Y ~= 0) = 1;
%% Run Gradient Checking
X = randn(size(X_t));
Theta = randn(size(Theta_t));
num_users = size(Y, 2);
num_movies = size(Y, 1);
num_features = size(Theta_t, 2);
numgrad = computeNumericalGradient( ...
@(t) cofiCostFunc(t, Y, R, num_users, num_movies, ...
num_features, lambda), [X(:); Theta(:)]);
[cost, grad] = cofiCostFunc([X(:); Theta(:)], Y, R, num_users, ...
num_movies, num_features, lambda);
disp([numgrad grad]);
fprintf(['The above two columns you get should be very similar.\n' ...
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
diff = norm(numgrad-grad)/norm(numgrad+grad);
fprintf(['If your cost function implementation is correct, then \n' ...
'the relative difference will be small (less than 1e-9). \n' ...
'\nRelative Difference: %g\n'], diff);
end
\ No newline at end of file
function [J, grad] = cofiCostFunc(params, Y, R, num_users, num_movies, ...
num_features, lambda)
%COFICOSTFUNC Collaborative filtering cost function
% [J, grad] = COFICOSTFUNC(params, Y, R, num_users, num_movies, ...
% num_features, lambda) returns the cost and gradient for the
% collaborative filtering problem.
%
% Unfold the U and W matrices from params
X = reshape(params(1:num_movies*num_features), num_movies, num_features);
Theta = reshape(params(num_movies*num_features+1:end), ...
num_users, num_features);
% You need to return the following values correctly
J = 0;
X_grad = zeros(size(X));
Theta_grad = zeros(size(Theta));
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the cost function and gradient for collaborative
% filtering. Concretely, you should first implement the cost
% function (without regularization) and make sure it is
% matches our costs. After that, you should implement the
% gradient and use the checkCostFunction routine to check
% that the gradient is correct. Finally, you should implement
% regularization.
%
% Notes: X - num_movies x num_features matrix of movie features
% Theta - num_users x num_features matrix of user features
% Y - num_movies x num_users matrix of user ratings of movies
% R - num_movies x num_users matrix, where R(i, j) = 1 if the
% i-th movie was rated by the j-th user
%
% You should set the following variables correctly:
%
% X_grad - num_movies x num_features matrix, containing the
% partial derivatives w.r.t. to each element of X
% Theta_grad - num_users x num_features matrix, containing the
% partial derivatives w.r.t. to each element of Theta
%
diff = ((X*Theta') - Y).*R;
J = sum(sum(diff.^2))/2 + sum(sum(X.^2))*lambda/2 + sum(sum(Theta.^2))*(lambda)/2;
X_grad = diff*Theta + lambda*X;
Theta_grad = (diff')*X + lambda*Theta;
% =============================================================
grad = [X_grad(:); Theta_grad(:)];
end
function numgrad = computeNumericalGradient(J, theta)
%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
%and gives us a numerical estimate of the gradient.
% numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
% gradient of the function J around theta. Calling y = J(theta) should
% return the function value at theta.
% Notes: The following code implements numerical gradient checking, and
% returns the numerical gradient.It sets numgrad(i) to (a numerical
% approximation of) the partial derivative of J with respect to the
% i-th input argument, evaluated at theta. (i.e., numgrad(i) should
% be the (approximately) the partial derivative of J with respect
% to theta(i).)
%
numgrad = zeros(size(theta));
perturb = zeros(size(theta));
e = 1e-4;
for p = 1:numel(theta)
% Set perturbation vector
perturb(p) = e;
loss1 = J(theta - perturb);
loss2 = J(theta + perturb);
% Compute Numerical Gradient
numgrad(p) = (loss2 - loss1) / (2*e);
perturb(p) = 0;
end
end
function [mu sigma2] = estimateGaussian(X)
%ESTIMATEGAUSSIAN This function estimates the parameters of a
%Gaussian distribution using the data in X
% [mu sigma2] = estimateGaussian(X),
% The input X is the dataset with each n-dimensional data point in one row
% The output is an n-dimensional vector mu, the mean of the data set
% and the variances sigma^2, an n x 1 vector
%
% Useful variables
[m, n] = size(X);
% You should return these values correctly
sigma2 = zeros(n, 1);
% ====================== YOUR CODE HERE ======================
% Instructions: Compute the mean of the data and the variances
% In particular, mu(i) should contain the mean of
% the data for the i-th feature and sigma2(i)
% should contain variance of the i-th feature.
%
mu = mean(X, 1)';
sigma2 = var(X, 1)';
% =============================================================
end
%% Machine Learning Online Class
% Exercise 8 | Anomaly Detection and Collaborative Filtering
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the
% exercise. You will need to complete the following functions:
%
% estimateGaussian.m
% selectThreshold.m
% cofiCostFunc.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% Initialization
clear ; close all; clc
%% ================== Part 1: Load Example Dataset ===================
% We start this exercise by using a small dataset that is easy to
% visualize.
%
% Our example case consists of 2 network server statistics across
% several machines: the latency and throughput of each machine.
% This exercise will help us find possibly faulty (or very fast) machines.
%
fprintf('Visualizing example dataset for outlier detection.\n\n');
% The following command loads the dataset. You should now have the
% variables X, Xval, yval in your environment
load('ex8data1.mat');
% Visualize the example dataset
plot(X(:, 1), X(:, 2), 'bx');
axis([0 30 0 30]);
xlabel('Latency (ms)');
ylabel('Throughput (mb/s)');
fprintf('Program paused. Press enter to continue.\n');
pause
%% ================== Part 2: Estimate the dataset statistics ===================
% For this exercise, we assume a Gaussian distribution for the dataset.
%
% We first estimate the parameters of our assumed Gaussian distribution,
% then compute the probabilities for each of the points and then visualize
% both the overall distribution and where each of the points falls in
% terms of that distribution.
%
fprintf('Visualizing Gaussian fit.\n\n');
% Estimate my and sigma2
[mu sigma2] = estimateGaussian(X);
% Returns the density of the multivariate normal at each data point (row)
% of X
p = multivariateGaussian(X, mu, sigma2);
% Visualize the fit
visualizeFit(X, mu, sigma2);
xlabel('Latency (ms)');
ylabel('Throughput (mb/s)');
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ================== Part 3: Find Outliers ===================
% Now you will find a good epsilon threshold using a cross-validation set
% probabilities given the estimated Gaussian distribution
%
pval = multivariateGaussian(Xval, mu, sigma2);
[epsilon F1] = selectThreshold(yval, pval);
fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
fprintf('Best F1 on Cross Validation Set: %f\n', F1);
fprintf(' (you should see a value epsilon of about 8.99e-05)\n');
fprintf(' (you should see a Best F1 value of 0.875000)\n\n');
% Find the outliers in the training set and plot the
outliers = find(p < epsilon);
% Draw a red circle around those outliers
hold on
plot(X(outliers, 1), X(outliers, 2), 'ro', 'LineWidth', 2, 'MarkerSize', 10);
hold off
fprintf('Program paused. Press enter to continue.\n');
pause;
%% ================== Part 4: Multidimensional Outliers ===================
% We will now use the code from the previous part and apply it to a
% harder problem in which more features describe each datapoint and only
% some features indicate whether a point is an outlier.
%
% Loads the second dataset. You should now have the
% variables X, Xval, yval in your environment
load('ex8data2.mat');
% Apply the same steps to the larger dataset
[mu sigma2] = estimateGaussian(X);
% Training set
p = multivariateGaussian(X, mu, sigma2);
% Cross-validation set
pval = multivariateGaussian(Xval, mu, sigma2);
% Find the best threshold
[epsilon F1] = selectThreshold(yval, pval);
fprintf('Best epsilon found using cross-validation: %e\n', epsilon);
fprintf('Best F1 on Cross Validation Set: %f\n', F1);
fprintf(' (you should see a value epsilon of about 1.38e-18)\n');
fprintf(' (you should see a Best F1 value of 0.615385)\n');
fprintf('# Outliers found: %d\n\n', sum(p < epsilon));
%% Machine Learning Online Class
% Exercise 8 | Anomaly Detection and Collaborative Filtering
%
% Instructions
% ------------
%
% This file contains code that helps you get started on the
% exercise. You will need to complete the following functions:
%
% estimateGaussian.m
% selectThreshold.m
% cofiCostFunc.m
%
% For this exercise, you will not need to change any code in this file,
% or any other files other than those mentioned above.
%
%% =============== Part 1: Loading movie ratings dataset ================
% You will start by loading the movie ratings dataset to understand the
% structure of the data.
%
fprintf('Loading movie ratings dataset.\n\n');
% Load data
load ('ex8_movies.mat');
% Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies on
% 943 users
%
% R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
% rating to movie i
% From the matrix, we can compute statistics like average rating.
fprintf('Average rating for movie 1 (Toy Story): %f / 5\n\n', ...
mean(Y(1, R(1, :))));
% We can "visualize" the ratings matrix by plotting it with imagesc
imagesc(Y);
ylabel('Movies');
xlabel('Users');
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ============ Part 2: Collaborative Filtering Cost Function ===========
% You will now implement the cost function for collaborative filtering.
% To help you debug your cost function, we have included set of weights
% that we trained on that. Specifically, you should complete the code in
% cofiCostFunc.m to return J.
% Load pre-trained weights (X, Theta, num_users, num_movies, num_features)
load ('ex8_movieParams.mat');
% Reduce the data set size so that this runs faster
num_users = 4; num_movies = 5; num_features = 3;
X = X(1:num_movies, 1:num_features);
Theta = Theta(1:num_users, 1:num_features);
Y = Y(1:num_movies, 1:num_users);
R = R(1:num_movies, 1:num_users);
% Evaluate cost function
J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
num_features, 0);
fprintf(['Cost at loaded parameters: %f '...
'\n(this value should be about 22.22)\n'], J);
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ============== Part 3: Collaborative Filtering Gradient ==============
% Once your cost function matches up with ours, you should now implement
% the collaborative filtering gradient function. Specifically, you should
% complete the code in cofiCostFunc.m to return the grad argument.
%
fprintf('\nChecking Gradients (without regularization) ... \n');
% Check gradients by running checkNNGradients
checkCostFunction;
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ========= Part 4: Collaborative Filtering Cost Regularization ========
% Now, you should implement regularization for the cost function for
% collaborative filtering. You can implement it by adding the cost of
% regularization to the original cost computation.
%
% Evaluate cost function
J = cofiCostFunc([X(:) ; Theta(:)], Y, R, num_users, num_movies, ...
num_features, 1.5);
fprintf(['Cost at loaded parameters (lambda = 1.5): %f '...
'\n(this value should be about 31.34)\n'], J);
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ======= Part 5: Collaborative Filtering Gradient Regularization ======
% Once your cost matches up with ours, you should proceed to implement
% regularization for the gradient.
%
%
fprintf('\nChecking Gradients (with regularization) ... \n');
% Check gradients by running checkNNGradients
checkCostFunction(1.5);
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ============== Part 6: Entering ratings for a new user ===============
% Before we will train the collaborative filtering model, we will first
% add ratings that correspond to a new user that we just observed. This
% part of the code will also allow you to put in your own ratings for the
% movies in our dataset!
%
movieList = loadMovieList();
% Initialize my ratings
my_ratings = zeros(1682, 1);
% Check the file movie_idx.txt for id of each movie in our dataset
% For example, Toy Story (1995) has ID 1, so to rate it "4", you can set
my_ratings(1) = 4;
% Or suppose did not enjoy Silence of the Lambs (1991), you can set
my_ratings(98) = 2;
% We have selected a few movies we liked / did not like and the ratings we
% gave are as follows:
my_ratings(7) = 3;
my_ratings(12)= 5;
my_ratings(54) = 4;
my_ratings(64)= 5;
my_ratings(66)= 3;
my_ratings(69) = 5;
my_ratings(183) = 4;
my_ratings(226) = 5;
my_ratings(355)= 5;
fprintf('\n\nNew user ratings:\n');
for i = 1:length(my_ratings)
if my_ratings(i) > 0
fprintf('Rated %d for %s\n', my_ratings(i), ...
movieList{i});
end
end
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ================== Part 7: Learning Movie Ratings ====================
% Now, you will train the collaborative filtering model on a movie rating
% dataset of 1682 movies and 943 users
%
fprintf('\nTraining collaborative filtering...\n');
% Load data
load('ex8_movies.mat');
% Y is a 1682x943 matrix, containing ratings (1-5) of 1682 movies by
% 943 users
%
% R is a 1682x943 matrix, where R(i,j) = 1 if and only if user j gave a
% rating to movie i
% Add our own ratings to the data matrix
Y = [my_ratings Y];
R = [(my_ratings ~= 0) R];
% Normalize Ratings
[Ynorm, Ymean] = normalizeRatings(Y, R);
% Useful Values
num_users = size(Y, 2);
num_movies = size(Y, 1);
num_features = 10;
% Set Initial Parameters (Theta, X)
X = randn(num_movies, num_features);
Theta = randn(num_users, num_features);
initial_parameters = [X(:); Theta(:)];
% Set options for fmincg
options = optimset('GradObj', 'on', 'MaxIter', 100);
% Set Regularization
lambda = 10;
theta = fmincg (@(t)(cofiCostFunc(t, Ynorm, R, num_users, num_movies, ...
num_features, lambda)), ...
initial_parameters, options);
% Unfold the returned theta back into U and W
X = reshape(theta(1:num_movies*num_features), num_movies, num_features);
Theta = reshape(theta(num_movies*num_features+1:end), ...
num_users, num_features);
fprintf('Recommender system learning completed.\n');
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ================== Part 8: Recommendation for you ====================
% After training the model, you can now make recommendations by computing
% the predictions matrix.
%
p = X * Theta';
my_predictions = p(:,1) + Ymean;
movieList = loadMovieList();
[r, ix] = sort(my_predictions, 'descend');
fprintf('\nTop recommendations for you:\n');
for i=1:10
j = ix(i);
fprintf('Predicting rating %.1f for movie %s\n', my_predictions(j), ...
movieList{j});
end
fprintf('\n\nOriginal ratings provided:\n');
for i = 1:length(my_ratings)
if my_ratings(i) > 0
fprintf('Rated %d for %s\n', my_ratings(i), ...
movieList{i});
end
end
function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
% Minimize a continuous differentialble multivariate function. Starting point
% is given by "X" (D by 1), and the function named in the string "f", must
% return a function value and a vector of partial derivatives. The Polack-
% Ribiere flavour of conjugate gradients is used to compute search directions,
% and a line search using quadratic and cubic polynomial approximations and the
% Wolfe-Powell stopping criteria is used together with the slope ratio method
% for guessing initial step sizes. Additionally a bunch of checks are made to
% make sure that exploration is taking place and that extrapolation will not
% be unboundedly large. The "length" gives the length of the run: if it is
% positive, it gives the maximum number of line searches, if negative its
% absolute gives the maximum allowed number of function evaluations. You can
% (optionally) give "length" a second component, which will indicate the
% reduction in function value to be expected in the first line-search (defaults
% to 1.0). The function returns when either its length is up, or if no further
% progress can be made (ie, we are at a minimum, or so close that due to
% numerical problems, we cannot get any closer). If the function terminates
% within a few iterations, it could be an indication that the function value
% and derivatives are not consistent (ie, there may be a bug in the
% implementation of your "f" function). The function returns the found
% solution "X", a vector of function values "fX" indicating the progress made
% and "i" the number of iterations (line searches or function evaluations,
% depending on the sign of "length") used.
%
% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
%
% See also: checkgrad
%
% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
%
%
% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
%
% Permission is granted for anyone to copy, use, or modify these
% programs and accompanying documents for purposes of research or
% education, provided this copyright notice is retained, and note is
% made of any changes that have been made.
%
% These programs and documents are distributed without any warranty,
% express or implied. As the programs were written for research
% purposes only, they have not been tested to the degree that would be
% advisable in any important application. All use of these programs is
% entirely at the user's own risk.
%
% [ml-class] Changes Made:
% 1) Function name and argument specifications
% 2) Output display
%
% Read options
if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter')
length = options.MaxIter;
else
length = 100;
end
RHO = 0.01; % a bunch of constants for line searches
SIG = 0.5; % RHO and SIG are the constants in the Wolfe-Powell conditions
INT = 0.1; % don't reevaluate within 0.1 of the limit of the current bracket
EXT = 3.0; % extrapolate maximum 3 times the current bracket
MAX = 20; % max 20 function evaluations per line search
RATIO = 100; % maximum allowed slope ratio
argstr = ['feval(f, X']; % compose string used to call function
for i = 1:(nargin - 3)
argstr = [argstr, ',P', int2str(i)];
end
argstr = [argstr, ')'];
if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end
S=['Iteration '];
i = 0; % zero the run length counter
ls_failed = 0; % no previous line search has failed
fX = [];
[f1 df1] = eval(argstr); % get function value and gradient
i = i + (length<0); % count epochs?!
s = -df1; % search direction is steepest
d1 = -s'*s; % this is the slope
z1 = red/(1-d1); % initial step is red/(|s|+1)
while i < abs(length) % while not finished
i = i + (length>0); % count iterations?!
X0 = X; f0 = f1; df0 = df1; % make a copy of current values
X = X + z1*s; % begin line search
[f2 df2] = eval(argstr);
i = i + (length<0); % count epochs?!
d2 = df2'*s;
f3 = f1; d3 = d1; z3 = -z1; % initialize point 3 equal to point 1
if length>0, M = MAX; else M = min(MAX, -length-i); end
success = 0; limit = -1; % initialize quanteties
while 1
while ((f2 > f1+z1*RHO*d1) || (d2 > -SIG*d1)) && (M > 0)
limit = z1; % tighten the bracket
if f2 > f1
z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3); % quadratic fit
else
A = 6*(f2-f3)/z3+3*(d2+d3); % cubic fit
B = 3*(f3-f2)-z3*(d3+2*d2);
z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A; % numerical error possible - ok!
end
if isnan(z2) || isinf(z2)
z2 = z3/2; % if we had a numerical problem then bisect
end
z2 = max(min(z2, INT*z3),(1-INT)*z3); % don't accept too close to limits
z1 = z1 + z2; % update the step
X = X + z2*s;
[f2 df2] = eval(argstr);
M = M - 1; i = i + (length<0); % count epochs?!
d2 = df2'*s;
z3 = z3-z2; % z3 is now relative to the location of z2
end
if f2 > f1+z1*RHO*d1 || d2 > -SIG*d1
break; % this is a failure
elseif d2 > SIG*d1
success = 1; break; % success
elseif M == 0
break; % failure
end
A = 6*(f2-f3)/z3+3*(d2+d3); % make cubic extrapolation
B = 3*(f3-f2)-z3*(d3+2*d2);
z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3)); % num. error possible - ok!
if ~isreal(z2) || isnan(z2) || isinf(z2) || z2 < 0 % num prob or wrong sign?
if limit < -0.5 % if we have no upper limit
z2 = z1 * (EXT-1); % the extrapolate the maximum amount
else
z2 = (limit-z1)/2; % otherwise bisect
end
elseif (limit > -0.5) && (z2+z1 > limit) % extraplation beyond max?
z2 = (limit-z1)/2; % bisect
elseif (limit < -0.5) && (z2+z1 > z1*EXT) % extrapolation beyond limit
z2 = z1*(EXT-1.0); % set to extrapolation limit
elseif z2 < -z3*INT
z2 = -z3*INT;
elseif (limit > -0.5) && (z2 < (limit-z1)*(1.0-INT)) % too close to limit?
z2 = (limit-z1)*(1.0-INT);
end
f3 = f2; d3 = d2; z3 = -z2; % set point 3 equal to point 2
z1 = z1 + z2; X = X + z2*s; % update current estimates
[f2 df2] = eval(argstr);
M = M - 1; i = i + (length<0); % count epochs?!
d2 = df2'*s;
end % end of line search
if success % if line search succeeded
f1 = f2; fX = [fX' f1]';
fprintf('%s %4i | Cost: %4.6e\r', S, i, f1);
s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2; % Polack-Ribiere direction
tmp = df1; df1 = df2; df2 = tmp; % swap derivatives
d2 = df1'*s;
if d2 > 0 % new slope must be negative
s = -df1; % otherwise use steepest direction
d2 = -s'*s;
end
z1 = z1 * min(RATIO, d1/(d2-realmin)); % slope ratio but max RATIO
d1 = d2;
ls_failed = 0; % this line search did not fail
else
X = X0; f1 = f0; df1 = df0; % restore point from before failed line search
if ls_failed || i > abs(length) % line search failed twice in a row
break; % or we ran out of time, so we give up
end
tmp = df1; df1 = df2; df2 = tmp; % swap derivatives
s = -df1; % try steepest
d1 = -s'*s;
z1 = 1/(1-d1);
ls_failed = 1; % this line search failed
end
if exist('OCTAVE_VERSION')
fflush(stdout);
end
end
fprintf('\n');
The author of "jsonlab" toolbox is Qianqian Fang. Qianqian
is currently an Assistant Professor at Massachusetts General Hospital,
Harvard Medical School.
Address: Martinos Center for Biomedical Imaging,
Massachusetts General Hospital,
Harvard Medical School
Bldg 149, 13th St, Charlestown, MA 02129, USA
URL: http://nmr.mgh.harvard.edu/~fangq/
Email: <fangq at nmr.mgh.harvard.edu> or <fangqq at gmail.com>
The script loadjson.m was built upon previous works by
- Nedialko Krouchev: http://www.mathworks.com/matlabcentral/fileexchange/25713
date: 2009/11/02
- François Glineur: http://www.mathworks.com/matlabcentral/fileexchange/23393
date: 2009/03/22
- Joel Feenstra: http://www.mathworks.com/matlabcentral/fileexchange/20565
date: 2008/07/03
This toolbox contains patches submitted by the following contributors:
- Blake Johnson <bjohnso at bbn.com>
part of revision 341
- Niclas Borlin <Niclas.Borlin at cs.umu.se>
various fixes in revision 394, including
- loadjson crashes for all-zero sparse matrix.
- loadjson crashes for empty sparse matrix.
- Non-zero size of 0-by-N and N-by-0 empty matrices is lost after savejson/loadjson.
- loadjson crashes for sparse real column vector.
- loadjson crashes for sparse complex column vector.
- Data is corrupted by savejson for sparse real row vector.
- savejson crashes for sparse complex row vector.
- Yul Kang <yul.kang.on at gmail.com>
patches for svn revision 415.
- savejson saves an empty cell array as [] instead of null
- loadjson differentiates an empty struct from an empty array
============================================================================
JSONlab - a toolbox to encode/decode JSON/UBJSON files in MATLAB/Octave
----------------------------------------------------------------------------
JSONlab ChangeLog (key features marked by *):
== JSONlab 1.0 (codename: Optimus - Final), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2015/01/02 polish help info for all major functions, update examples, finalize 1.0
2014/12/19 fix a bug to strictly respect NoRowBracket in savejson
== JSONlab 1.0.0-RC2 (codename: Optimus - RC2), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/11/22 show progress bar in loadjson ('ShowProgress')
2014/11/17 add Compact option in savejson to output compact JSON format ('Compact')
2014/11/17 add FastArrayParser in loadjson to specify fast parser applicable levels
2014/09/18 start official github mirror: https://github.com/fangq/jsonlab
== JSONlab 1.0.0-RC1 (codename: Optimus - RC1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/09/17 fix several compatibility issues when running on octave versions 3.2-3.8
2014/09/17 support 2D cell and struct arrays in both savejson and saveubjson
2014/08/04 escape special characters in a JSON string
2014/02/16 fix a bug when saving ubjson files
== JSONlab 0.9.9 (codename: Optimus - beta), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2014/01/22 use binary read and write in saveubjson and loadubjson
== JSONlab 0.9.8-1 (codename: Optimus - alpha update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2013/10/07 better round-trip conservation for empty arrays and structs (patch submitted by Yul Kang)
== JSONlab 0.9.8 (codename: Optimus - alpha), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2013/08/23 *universal Binary JSON (UBJSON) support, including both saveubjson and loadubjson
== JSONlab 0.9.1 (codename: Rodimus, update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/12/18 *handling of various empty and sparse matrices (fixes submitted by Niclas Borlin)
== JSONlab 0.9.0 (codename: Rodimus), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/06/17 *new format for an invalid leading char, unpacking hex code in savejson
2012/06/01 support JSONP in savejson
2012/05/25 fix the empty cell bug (reported by Cyril Davin)
2012/04/05 savejson can save to a file (suggested by Patrick Rapin)
== JSONlab 0.8.1 (codename: Sentiel, Update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/02/28 loadjson quotation mark escape bug, see http://bit.ly/yyk1nS
2012/01/25 patch to handle root-less objects, contributed by Blake Johnson
== JSONlab 0.8.0 (codename: Sentiel), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2012/01/13 *speed up loadjson by 20 fold when parsing large data arrays in matlab
2012/01/11 remove row bracket if an array has 1 element, suggested by Mykel Kochenderfer
2011/12/22 *accept sequence of 'param',value input in savejson and loadjson
2011/11/18 fix struct array bug reported by Mykel Kochenderfer
== JSONlab 0.5.1 (codename: Nexus Update 1), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2011/10/21 fix a bug in loadjson, previous code does not use any of the acceleration
2011/10/20 loadjson supports JSON collections - concatenated JSON objects
== JSONlab 0.5.0 (codename: Nexus), FangQ <fangq (at) nmr.mgh.harvard.edu> ==
2011/10/16 package and release jsonlab 0.5.0
2011/10/15 *add json demo and regression test, support cpx numbers, fix double quote bug
2011/10/11 *speed up readjson dramatically, interpret _Array* tags, show data in root level
2011/10/10 create jsonlab project, start jsonlab website, add online documentation
2011/10/07 *speed up savejson by 25x using sprintf instead of mat2str, add options support
2011/10/06 *savejson works for structs, cells and arrays
2011/09/09 derive loadjson from JSON parser from MATLAB Central, draft savejson.m
Copyright 2011-2015 Qianqian Fang <fangq at nmr.mgh.harvard.edu>. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of the copyright holders.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
kHznDgulAUqo00Ig
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment