# Principal component analysis (PCA): Explained and implemented

`import numpy as npnp.random.seed(1)vec1 = np.array([0, 0, 0])mat1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])sample_for_class1 = np.random.multivariate_normal(vec1, mat1, 20).Tassert sample_for_class1.shape == (3, 20), "The dimension of the sample_for_class1 matrix is not 3x20"vec2 = np.array([1, 1, 1])mat2 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])sample_for_class2 = np.random.multivariate_normal(vec2, mat2, 20).Tassert sample_for_class2.shape == (3, 20), "The dimension of the sample_for_class2 matrix is not 3x20"all_data = np.concatenate((sample_for_class1, sample_for_class2), axis=1)assert all_data.shape == (3, 40), "The dimension of the all_data matrix is not 3x20"`
`mean_dim1 = np.mean(all_data[0, :])mean_dim2 = np.mean(all_data[1, :])mean_dim3 = np.mean(all_data[2, :])mean_vector = np.array([[mean_dim1], [mean_dim2], [mean_dim3]])print('The Mean Vector:\n', mean_vector)scatter_matrix = np.zeros((3,3))for i in range(all_data.shape):    scatter_matrix += (all_data[:, i].reshape(3, 1) - mean_vector).dot((all_data[:, i].reshape(3, 1) - mean_vector).T)print('The Scatter Matrix is :\n', scatter_matrix)output : ('The Mean Vector:\n', array([[0.41667492],       [0.69848315],       [0.49242335]]))('The Scatter Matrix:\n', array([[38.4878051 , 10.50787213,11.13746016],       [10.50787213, 36.23651274, 11.96598642],       [11.13746016, 11.96598642, 49.73596619]]))`
`eig_val, eig_vec = np.linalg.eig(scatter_matrix)`
`for ev in eig_vec:    np.testing.assert_array_almost_equal(1.0, np.linalg.norm(ev))`
`# We Make a list of tuple containing (eigenvalue, eigenvector)eig_pairs = [(np.abs(eig_val_sc[i]), eig_vec_sc[:,i]) for i in range(len(eig_val_sc))]# We then Sort list of tuples by the eigenvalueeig_pairs.sort(key=lambda x: x, reverse=True)# verify that the list is correctly sorted by decreasing eigenvaluesfor i in eig_pairs:    print(i)Output : 65.1693677907819532.6947129632179926.596203282097097`
`matrix_w = np.hstack((eig_pairs.reshape(3,1), eig_pairs.reshape(3,1)))print('Matrix W:\n', matrix_w)('Matrix W:\n', array([[-0.49210223, -0.64670286],       [-0.47927902, -0.35756937],       [-0.72672348,  0.67373552]]))`
`transformed = matrix_w.T.dot(all_samples)assert transformed.shape == (2,40), "The matrix is not 2x40 dimensional."`