当前位置：首页 > news >正文

DAY 43 复习日

news 2025/6/27 10:35:59

DAY 43 复习日

作业：kaggle找到一个图像数据集，用cnn网络进行训练并且用grad-cam做可视化

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}
plt.rcParams["figure.figsize"] = (20,20)from tensorflow.keras.utils import load_img, img_to_array
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras import backend as K
from scipy.ndimage import zoom
import numpy as np
import os
import tensorflow as tf
import keras
tf.compat.v1.disable_eager_execution()

model = VGG16(include_top=True, weights='imagenet',input_shape=(224,224,3))

def generate_cam(input_model, image, layer_name='block5_conv3', H=224, W=224):cls = np.argmax(input_model.predict(image)) # Obtain the predicted classconv_output = input_model.get_layer(layer_name).output #Get the weights of the last output layerlast_conv_layer_model = keras.Model(input_model.inputs, conv_output) #Create a model with the last output layer    class_weights = input_model.get_layer(layer_name).get_weights()[0] # Get the weights of the output layer\class_weights = class_weights[0,:,:,:]class_weights = np.mean(class_weights, axis=(0, 1))last_conv_output = last_conv_layer_model.predict(image) #The feature map output from last output layerlast_conv_output = last_conv_output[0, :]cam = np.dot(last_conv_output, class_weights)cam = zoom(cam, H/cam.shape[0]) #Spatial Interpolation/zooming to image sizecam = cam / np.max(cam) #Normalizing the gradcamreturn cam


def grad_cam(input_model, image, layer_name='block5_conv3',H=224,W=224):cls = np.argmax(input_model.predict(image)) #Get the predicted classy_c = input_model.output[0, cls] #Probability Scoreconv_output = input_model.get_layer(layer_name).output #Tensor of the last layer of cnngrads = K.gradients(y_c, conv_output)[0] #Gradients of the predicted class wrt conv_output layerget_output = K.function([input_model.input], [conv_output, grads]) output, grads_val = get_output([image]) #Gives output of image till conv_output layer and the gradient values at that leveloutput, grads_val = output[0, :], grads_val[0, :, :, :]weights = np.mean(grads_val, axis=(0, 1)) #Mean of gradients which acts as our weightscam = np.dot(output, weights) #Grad-CAM outputcam = np.maximum(cam, 0) #Applying Relucam = zoom(cam,H/cam.shape[0]) #Spatial Interpolation/zooming to image sizecam = cam / cam.max() #Normalizing the gradcamreturn cam

def grad_cam_plus(input_model, image, layer_name='block5_conv3',H=224,W=224):cls = np.argmax(input_model.predict(image))y_c = input_model.output[0, cls]conv_output = input_model.get_layer(layer_name).outputgrads = K.gradients(y_c, conv_output)[0]first = K.exp(y_c)*grads #Variables used to calculate first second and third gradientssecond = K.exp(y_c)*grads*gradsthird = K.exp(y_c)*grads*grads*grads#Gradient calculationget_output = K.function([input_model.input], [y_c,first,second,third, conv_output, grads])y_c, conv_first_grad, conv_second_grad,conv_third_grad, conv_output, grads_val = get_output([img])global_sum = np.sum(conv_output[0].reshape((-1,conv_first_grad[0].shape[2])), axis=0)#Used to calculate the alpha values for each spatial locationalpha_num = conv_second_grad[0]alpha_denom = conv_second_grad[0]*2.0 + conv_third_grad[0]*global_sum.reshape((1,1,conv_first_grad[0].shape[2]))alpha_denom = np.where(alpha_denom != 0.0, alpha_denom, np.ones(alpha_denom.shape))alphas = alpha_num/alpha_denom#Calculating the weights and alpha's which is the scale at which we multiply the weights with more importanceweights = np.maximum(conv_first_grad[0], 0.0)alpha_normalization_constant = np.sum(np.sum(alphas, axis=0),axis=0)alphas /= alpha_normalization_constant.reshape((1,1,conv_first_grad[0].shape[2])) #Normalizing alpha#Weights with alpha multiplied to get spatial importancedeep_linearization_weights = np.sum((weights*alphas).reshape((-1,conv_first_grad[0].shape[2])),axis=0)grad_CAM_map = np.sum(deep_linearization_weights*conv_output[0], axis=2) #Grad-CAM++ mapcam = np.maximum(grad_CAM_map, 0)cam = zoom(cam,H/cam.shape[0])cam = cam / np.max(cam) return cam

images = ["/kaggle/input/d/tanishqsardana/articleimages/goldfish_224.jpg","/kaggle/input/d/tanishqsardana/articleimages/kite_224.jpg","/kaggle/input/d/tanishqsardana/articleimages/scorpion_224.jpg","/kaggle/input/d/tanishqsardana/articleimages/sealion_224.jpg","/kaggle/input/d/tanishqsardana/articleimages/spoonbill_224.jpg"]

for path in images:orig_img = np.array(load_img(path,target_size=(224,224)),dtype=np.uint8)img = np.array(load_img(path,target_size=(224,224)),dtype=np.float64)img = np.expand_dims(img,axis=0)img = preprocess_input(img)predictions = model.predict(img)top_n = 5top = decode_predictions(predictions, top=top_n)[0]cls = np.argsort(predictions[0])[-top_n:][::-1]cam = generate_cam(model,img)gradcam=grad_cam(model,img)gradcamplus=grad_cam_plus(model,img)print(path)print("class activation map for:",top[0])print(type(gradcam))# If gradcam is not a numpy array, convert it
# For example, if gradcam is a PIL Image object:
# gradcam = np.array(gradcam)# Check the data type and shape after conversionprint(gradcam.dtype)print(gradcam.shape)fig, ax = plt.subplots(nrows=1,ncols=4)plt.subplot(141)plt.imshow(orig_img)plt.title("Input Image")plt.axis('off')plt.subplot(142)plt.imshow(orig_img)plt.imshow(cam,alpha=0.8,cmap="jet")plt.title("CAM")plt.axis('off')plt.subplot(143)plt.imshow(orig_img)plt.imshow(gradcam,alpha=0.8,cmap="jet")plt.title("Grad-CAM")plt.axis('off')plt.subplot(144)plt.imshow(orig_img)plt.imshow(gradcamplus,alpha=0.8,cmap="jet")plt.title("Grad-CAM++")plt.axis('off')plt.show()

/kaggle/input/d/tanishqsardana/articleimages/goldfish_224.jpg
class activation map for: ('n01443537', 'goldfish', 1.0)
<class 'numpy.ndarray'>
float32
(224, 224)

在这里插入图片描述

/kaggle/input/d/tanishqsardana/articleimages/kite_224.jpg
class activation map for: ('n03888257', 'parachute', 0.93463606)
<class 'numpy.ndarray'>
float32
(224, 224)

在这里插入图片描述

/kaggle/input/d/tanishqsardana/articleimages/scorpion_224.jpg
class activation map for: ('n01770393', 'scorpion', 0.99979585)
<class 'numpy.ndarray'>
float32
(224, 224)

在这里插入图片描述

/kaggle/input/d/tanishqsardana/articleimages/sealion_224.jpg
class activation map for: ('n02077923', 'sea_lion', 0.9999479)
<class 'numpy.ndarray'>
float32
(224, 224)

在这里插入图片描述

/kaggle/input/d/tanishqsardana/articleimages/spoonbill_224.jpg
class activation map for: ('n02006656', 'spoonbill', 0.997894)
<class 'numpy.ndarray'>
float32
(224, 224)

在这里插入图片描述