Implementing Singular Value Decomposition using Orthogonal Iteration

In [17]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.animation as anm
import math
from IPython import display
from matplotlib.animation import ImageMagickFileWriter

%matplotlib notebook
#https://towardsdatascience.com/matplotlib-animations-in-jupyter-notebook-4422e4f0e389

plt.rcParams['animation.ffmpeg_path'] = 'C:\\FFmpeg\\bin\\ffmpeg.exe'
plt.rcParams['animation.convert_path'] = 'C:\\Program Files\\ImageMagick-7.1.0-Q16-HDRI\\magick.exe'

Orthogonal Iteration Animation with a Symmetric 2x2

Create a matrix whose eigenvectors are [1,1] and [-1,1] (I include square roots to make the eigenvectors all length 1)

In [2]:
eigenvectors = np.array([
    [1/math.sqrt(2),-1/math.sqrt(2)],
    [1/math.sqrt(2),   1/math.sqrt(2)]
])
eigenvalues = np.array([
    [2,0],
    [0,1]
])
transform = eigenvectors @ eigenvalues @ eigenvectors.T
transform
Out[2]:
array([[1.5, 0.5],
       [0.5, 1.5]])

Below algorithm accepts a matrix and subtracts all its columns from each other until the columns are all orthogonal to each other and of length 1.

In [3]:
def gram_schmidt(A):
    Q = np.zeros(A.shape)
    R = np.zeros((A.shape[1],A.shape[1]))
    for column in range(A.shape[1]):
        #if Q is an orthonormal basis, Qt * v = Q^-1 * v (v's coordinates relative to Q basis vectors)
        linear_combo        = np.dot(a=Q.T, b=A[:,column])
        R[:     , column]   = linear_combo
        projection          = np.dot(Q, linear_combo)
        orthogonal_vector   = A[:, column] - projection
        R[column, column]   = np.dot(orthogonal_vector, orthogonal_vector)**0.5 #norm
        if R[column, column] == 0:
            raise ArithmeticError(f'The following Column is a linear combination of previous columns: {str(A[:, column])} and matrix cannot be decomposed because it is singular.')
        Q[:     , column]   = orthogonal_vector/R[column, column] #normalize
        
    return Q,R

Below algorithm continuously multiplies a set of starting vectors by the matrix A. (In the animation this brings the vectors closer together.
After each multiplication, we orthonormalize the set of vectors and push them ninety degrees away from each other.

In [4]:
def orthogonal_iteration(A, steps=30):
    eigenvectors = np.identity(len(A)) # starting vectors
    
    for step in range(steps):
        A_times_eigenvectors = np.dot(A,eigenvectors)  #multiply every column of Q with the matrix A, save result in AQ
        eigenvectors, coordinates = gram_schmidt(A_times_eigenvectors) #pick the first column of AQ as an anchor, push all the other columns away to 90 from each other
    
    A_times_eigenvectors = np.dot(A,eigenvectors)             #multiply each eigenvector by A to find eigenvalues
    eigenvalues = np.dot(eigenvectors.T, A_times_eigenvectors)
    eigenvalues = np.diag(eigenvalues)
    return eigenvectors, eigenvalues
In [5]:
def orthogonal_iteration_animation1(mat):
    random_vectors   = np.identity(len(mat))
    animation_slides = [random_vectors]
    animation_slides.append(np.c_[random_vectors[:,0]/np.linalg.norm(random_vectors[:,0]),
                                  random_vectors[:,1]/np.linalg.norm(random_vectors[:,1])])
    for i in range(99):
        A = animation_slides[-1].copy()
        A = mat @ A
        animation_slides.append(A)
        Q,R = gram_schmidt(A)
        animation_slides.append(Q)
    return animation_slides

Note how the animation has two main movements

  1. The two vectors come closer together (Representing matrix multiplication with the two vectors)
  2. The two vectors are pushed apart to 90 degrees (Representing the effect of orthogonalization algorithm)
Eventually the algorithm converges when the 2 vectors even after you multiply the matrix A on them, they still point same directions 90 degrees away from each other.

In [39]:
animation_slides = orthogonal_iteration_animation1(transform)
vectors = np.identity(2)
fig, ax = plt.subplots(figsize=(5,5.5))
ax.quiver([0,0],[0,0],[1,1],[1,-1],angles='xy', scale_units='xy', scale=1,color='red',label='True Eigenvectors')
vector_plot = ax.quiver([0]*2,[0]*2,  vectors[:,0], vectors[:,1],angles='xy', scale_units='xy', scale=1)
ax.set(xlim=(-2,2),ylim=(-2,2))
ax.legend()
ax.grid()

def vector_animation(i):
    new_vectors = animation_slides[i]
    vector_plot.set_UVC(new_vectors[:,0], new_vectors[:,1])
    ax.set(title=f"Frame {i}")
    return vector_plot,
animator = anm.FuncAnimation(fig=fig, func=vector_animation, 
                             frames=20, 
                             interval = 1000)
video = animator.to_html5_video()
html = display.HTML(video)
display.display(html)
plt.close()
In [40]:
animator.save("orthogonal_iteration.gif",writer="imagemagick")

Implementing SVD with Orthogonal Iteration

In [7]:
def singular_value_decomposition(matrix, rank):
    '''
    Linear Algebra for Everyone by Gilbert Strang, page 260 calls the left singular vectors, the column space,
    and the right singular vectors the row space.  I follow the same convention
    '''
    covariance      = np.dot(matrix.T, matrix)
    eigenvectors, eigenvalues = orthogonal_iteration(covariance)
    singular_values = eigenvalues**2
    sorted_indices  = np.argsort(singular_values)[::-1]
    biggest_singular_values = sorted_indices[:rank]
    
    row_space       = eigenvectors[:,biggest_singular_values].T #Eigenvectors Corresponding to Largest Eigenvalues
    column_space    = np.dot(matrix, eigenvectors)              #Multiply every eigenvector by the matrix
    column_space    = np.divide(column_space, singular_values)  
    column_space    = column_space[:,biggest_singular_values]
    return column_space, np.diag(biggest_singular_values), row_space

Below is (objectively speaking) the cutest cat alive. He is a Golden British Shorthair cat named Hosico who lives in Moscow with his human servants. You can follow this cute little fluffball on his YouTube channel.

In [8]:
img = Image.open('hosicoBowl.PNG')
greyscale=  img.convert('L')
hosico = np.array(greyscale)
fig, ax = plt.subplots()
print(hosico[:5,:])
ax.imshow(hosico)
[[137 138 140 ... 127 126 125]
 [137 137 139 ... 127 125 124]
 [137 137 137 ... 127 125 124]
 [137 136 136 ... 127 125 123]
 [139 140 137 ... 126 124 124]]
Out[8]:
<matplotlib.image.AxesImage at 0x27bbc607820>

If we just use the first 70, singular vectors, we can reproduce a blurry picture of the original cat picture. But, notice how we can still see his big round eyes!!!

In [29]:
column_space, singular_values, row_space = singular_value_decomposition(matrix=hosico, rank=100)
low_rank_approximation = column_space @ singular_values @ row_space
fig, ax = plt.subplots(ncols=2)
ax[0].imshow(hosico)
ax[1].imshow(low_rank_approximation)
Out[29]:
<matplotlib.image.AxesImage at 0x27bc04eed60>
In [23]:
print(f"The cat picture can be reconstructed with just three matrices of size{column_space.shape, row_space.shape}\n and a 1d vector of {len(singular_values)} singular values")
The cat picture can be reconstructed with just three matrices of size((261, 100), (100, 358))
 and a 1d vector of 100 singular values
In [24]:
print(f"In total this amounts to only {column_space.shape[0]*column_space.shape[1]+row_space.shape[0]*row_space.shape[1]+len(singular_values)} numbers")
In total this amounts to only 62000 numbers
In [41]:
print(f"The original cat picture had {hosico.shape[0]*hosico.shape[1]} numbers")
The original cat picture had 93438 numbers
In [42]:
img = Image.open('hosicoSad.PNG')
greyscale=  img.convert('L')
hosico1 = np.array(greyscale)
column_space, singular_values, row_space = singular_value_decomposition(matrix=hosico1, rank=40)
low_rank_approximation1 = column_space @ singular_values @ row_space
fig, ax = plt.subplots(ncols=2)
ax[0].imshow(hosico1)
ax[1].imshow(low_rank_approximation1)
Out[42]:
<matplotlib.image.AxesImage at 0x27bc0b9ae50>
In [43]:
fig, ax = plt.subplots(nrows=2,ncols=2)
ax[0][0].imshow(hosico)
ax[0][0].set_title('Original Picture')
ax[0][1].imshow(low_rank_approximation)
ax[1][0].imshow(hosico1)
ax[1][1].imshow(low_rank_approximation1)
ax[0][1].set_title('Low Rank Approximation')
Out[43]:
Text(0.5, 1.0, 'Low Rank Approximation')
In [ ]: