from sklearn.datasets import fetch_olivetti_faces
import matplotlib.pyplot as plt
import pandas as pd
faces = fetch_olivetti_faces()

df = pd.DataFrame(data=faces.data)
df['person']=faces.target

df.head()

features = faces.data
targets = faces.target

print(targets)

[ 0  0  0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  1  1  1  2  2  2  2
  2  2  2  2  2  2  3  3  3  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4
  4  4  5  5  5  5  5  5  5  5  5  5  6  6  6  6  6  6  6  6  6  6  7  7
  7  7  7  7  7  7  7  7  8  8  8  8  8  8  8  8  8  8  9  9  9  9  9  9
  9  9  9  9 10 10 10 10 10 10 10 10 10 10 11 11 11 11 11 11 11 11 11 11
 12 12 12 12 12 12 12 12 12 12 13 13 13 13 13 13 13 13 13 13 14 14 14 14
 14 14 14 14 14 14 15 15 15 15 15 15 15 15 15 15 16 16 16 16 16 16 16 16
 16 16 17 17 17 17 17 17 17 17 17 17 18 18 18 18 18 18 18 18 18 18 19 19
 19 19 19 19 19 19 19 19 20 20 20 20 20 20 20 20 20 20 21 21 21 21 21 21
 21 21 21 21 22 22 22 22 22 22 22 22 22 22 23 23 23 23 23 23 23 23 23 23
 24 24 24 24 24 24 24 24 24 24 25 25 25 25 25 25 25 25 25 25 26 26 26 26
 26 26 26 26 26 26 27 27 27 27 27 27 27 27 27 27 28 28 28 28 28 28 28 28
 28 28 29 29 29 29 29 29 29 29 29 29 30 30 30 30 30 30 30 30 30 30 31 31
 31 31 31 31 31 31 31 31 32 32 32 32 32 32 32 32 32 32 33 33 33 33 33 33
 33 33 33 33 34 34 34 34 34 34 34 34 34 34 35 35 35 35 35 35 35 35 35 35
 36 36 36 36 36 36 36 36 36 36 37 37 37 37 37 37 37 37 37 37 38 38 38 38
 38 38 38 38 38 38 39 39 39 39 39 39 39 39 39 39]

person_id=12
start = person_id * 10
end = start + 10

plt.figure(figsize=(10,8))
for i in range(10):
  plt.subplot(2,5,i+1)
  plt.imshow(faces.images[start+i],cmap='grey')
  plt.title(f"Img {i+1}")
plt.suptitle(f"Person ID: {person_id}", fontsize=14)
plt.tight_layout()

from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

feature_train,feature_test,target_train,target_test = train_test_split(features,targets,test_size=0.25,stratify=targets, random_state=42)
pca = PCA()
pca.fit(features)

plt.figure(1,figsize=(12,8))
plt.plot(pca.explained_variance_, linewidth=2)
plt.xlabel('Components')
plt.ylabel('Explained Variance')

Text(0, 0.5, 'Explained Variance')

estimator = PCA(n_components=100,whiten=True)
feature_train_pca = estimator.fit_transform(feature_train)
feature_test_pca = estimator.transform(feature_test)

print(features.shape)
print(feature_train_pca.shape)

(400, 4096)
(300, 100)

print(estimator.components_)

[[ 0.0013871   0.00444178  0.00633373 ...  0.00412855  0.00090176
   0.00126438]
 [ 0.02940465  0.03534574  0.04069127 ... -0.03045208 -0.02943759
  -0.02632847]
 [-0.00146403 -0.0056751  -0.00737526 ... -0.01453169 -0.01164445
  -0.00951603]
 ...
 [-0.00842736  0.00028561  0.0123344  ...  0.00378641  0.00904468
  -0.01564159]
 [-0.04679057 -0.0217967   0.00324379 ... -0.01298527  0.01751349
   0.00155648]
 [ 0.00955339 -0.00692617 -0.02175085 ... -0.0120116  -0.01398824
  -0.00689595]]

number_of_eigenfaces = len(estimator.components_)
eigen_faces = estimator.components_.reshape((number_of_eigenfaces,64,64))

person_id=7
start = person_id * 4
end = start + 4

plt.figure(figsize=(10,8))
for i in range(10):
  plt.subplot(2,5,i+1)
  plt.imshow(eigen_faces[start+i],cmap='grey')
  plt.title(f"Img {i+1}")
plt.suptitle(f"Person ID: {person_id}", fontsize=14)
plt.tight_layout()

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix,ConfusionMatrixDisplay
import seaborn as sns

def evaluate_model(model,X_train,X_test,y_train,y_test, show_matrix=True):
  """
  Trains and evaluates a classifier model.

    Parameters:
        model: sklearn-like classifier (e.g., SVC(), KNeighborsClassifier(), etc.)
        X_train, X_test: Feature matrices
        y_train, y_test: Target arrays
        show_matrix: Whether to plot confusion matrix

    Returns:
        y_pred: Predicted labels on test set
        acc: Accuracy score

  """
  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)
  acc= accuracy_score(y_test,y_pred)

  print(f"\n Accuracy: {acc:.4f}")

  # print("\n Classification Report:\n", classification_report(y_test,y_pred))

  # if show_matrix:
  #   # ConfusionMatrixDisplay.from_predictions(y_test,y_pred,cmap='Blues')
  #   cm = confusion_matrix(y_test, y_pred)
  #   plt.figure(figsize=(10, 10))
  #   sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', square=True)
  #   plt.xlabel("Predicted")
  #   plt.ylabel("Actual")
  #   plt.title("Confusion Matrix")
  #   plt.tight_layout()

  return y_pred,acc

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

models = [
    ("Logistic Regression", LogisticRegression(max_iter=1000)),
    ("Naive Bayes", GaussianNB()),
    ("SVM (RBF)", SVC(kernel='rbf', gamma='scale')),
    ("KNN (k=3)", KNeighborsClassifier(n_neighbors=10)),
    ("Decision Tree", DecisionTreeClassifier()),
    ("Random Forest", RandomForestClassifier(n_estimators=100)),
    ("AdaBoost", AdaBoostClassifier(n_estimators=100))
]

for name, model in models:
    print(f"\n===  {name} ===")
    evaluate_model(model, feature_train_pca, feature_test_pca, target_train, target_test)

===  Logistic Regression ===

 Accuracy: 0.9800

===  Naive Bayes ===

 Accuracy: 0.9300

===  SVM (RBF) ===

 Accuracy: 0.9900

===  KNN (k=3) ===

 Accuracy: 0.6500

===  Decision Tree ===

 Accuracy: 0.4800

===  Random Forest ===

 Accuracy: 0.9400

===  AdaBoost ===

 Accuracy: 0.0900

	0	1	2	3	4	5	6	7	8	9	...	4087	4088	4089	4090	4091	4092	4093	4094	4095
0	0.309917	0.367769	0.417355	0.442149	0.528926	0.607438	0.657025	0.677686	0.690083	0.685950	...	0.669421	0.652893	0.661157	0.475207	0.132231	0.148760	0.152893	0.161157	0.157025
1	0.454545	0.471074	0.512397	0.557851	0.595041	0.640496	0.681818	0.702479	0.710744	0.702479	...	0.157025	0.136364	0.148760	0.152893	0.152893	0.152893	0.152893	0.152893	0.152893
2	0.318182	0.400826	0.491736	0.528926	0.586777	0.657025	0.681818	0.685950	0.702479	0.698347	...	0.132231	0.181818	0.136364	0.128099	0.148760	0.144628	0.140496	0.148760	0.152893
3	0.198347	0.194215	0.194215	0.194215	0.190083	0.190083	0.243802	0.404959	0.483471	0.516529	...	0.636364	0.657025	0.685950	0.727273	0.743802	0.764463	0.752066	0.752066	0.739669
4	0.500000	0.545455	0.582645	0.623967	0.648760	0.690083	0.694215	0.714876	0.723140	0.731405	...	0.161157	0.177686	0.173554	0.177686	0.177686	0.177686	0.177686	0.173554	0.173554

PCA Analysis¶