{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This demo is related to eigenfaces (for more info, see this paper: https://sites.cs.ucsb.edu/~mturk/Papers/jcn.pdf)\n", "# While scikit-learn has PCA built in, I avoided using that since it would hide what is actually going on." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load packages\n", "import numpy as np\n", "import numpy.linalg as la\n", "import matplotlib.pyplot as plt\n", "from sklearn.datasets import fetch_lfw_people" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# get the data\n", "lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# h is the height of each image, w is the width\n", "n_samples, h, w = lfw_people.images.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# X is the actual data we'll work with; the shape is n_samples by n_dimensions\n", "X = lfw_people.data\n", "np.shape(X)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# center the data\n", "X_centered = np.matrix(X - np.mean(X,0))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# compute the covariance matrix\n", "cov = X_centered.T * X_centered" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# compute the eigendecomposition (eigvecs.T[j] )\n", "eigvals, eigvecs = la.eig(cov) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# display the j'th eigenface (this is a reshaped eigenvector)\n", "j = 0\n", "plt.imshow(eigvecs.T[j].reshape((h,w)), cmap=plt.cm.gray)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# this function returns the best reconstruction of an input example x using a k-dimensional subspace\n", "# (i.e. using the top k principal directions, equivalently, the top k eigenvectors of the covariance matrix)\n", "def reconstruct(x, k, X, V):\n", " xhat = np.mean(X,0)\n", " for j in range(k):\n", " xhat = xhat + np.dot(V.T[j], x)[0,0] * V.T[j]\n", " return xhat\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# it's George W Bush!\n", "plt.imshow(X[200].reshape((h,w)), cmap=plt.cm.gray)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# display the reconstruction using the first k principal components, for k = 0 (just the mean), 1, 2, ..., 100\n", "for k in range(0,201):\n", " plt.title('k = %d' % k)\n", " plt.imshow(reconstruct(X[200], k-1, X, eigvecs).reshape((h,w)), cmap=plt.cm.gray)\n", " plt.pause(0.1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# we can also do PCA using the singular value decomposition (SVD) of the centered data\n", "u,s,vh = la.svd(X_centered, full_matrices = False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "u.shape, s.shape, vh.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# verify that the first singular vector of X_centered is the same as the first eigenvector of the covariance matrix C (up to a sign)\n", "np.allclose(vh[0], eigvecs.T[0]) or np.allclose(-vh[0], eigvecs.T[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# show the first image\n", "plt.imshow(X[0].reshape((h,w)), cmap=plt.cm.gray)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# show the reconstruction using the first k principal components\n", "k = 200\n", "plt.imshow(((u[:,0:k] * np.diag(s[0:k])*vh[0:k,:])[0] + np.mean(X,0)).reshape((h,w)), cmap=plt.cm.gray)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.2" } }, "nbformat": 4, "nbformat_minor": 4 }