Day 99 — Foolbox LBFGS Attack against All Keras Applications

今日主題:使用Foolbox LBFGS Attack攻擊Keras內建分類器

Tanli Hsu
21 min readAug 28, 2018

筆記

昨天介紹了使用Foolbox這個工具,製造對抗例來攻擊Keras內建的預訓練分類器。今天要測試的,就是這些預訓練分類器在Foolbox LBFGS Attack的攻擊下的抵抗能力。

對於抵抗力下的定義也很簡單,在Foolbox的 LBFGSAttack() 中,需要傳入一個 criterion 參數。而在昨天的例子裡,我使用的是 TargetClassProbability() 這個 method。這個 method需要傳入兩個參數,一個是目標class,一個是要將目標class的機率提高的門檻值。

例如我昨天使用的 TargetClassProbability( 388, p=0.5 ) ,這個指令的意思就是「將第 388 個 class的分類機率提高到 0.5 以上」,並以此作為 criterion 傳入攻擊模型中。

今天稍早已經對所有Keras Application中提供的預訓練分類器做了測試,除了MobileNet以外,其他的分類器全部都會遭到破解。所以今天定義的抵抗力,就是在同樣的 criterion 設定下,產生對抗例所需的時間。

DenseNet也是Keras提供的預訓練分類器,但是似乎 Linearity相當重,好像沒有測的必要?時間不夠的情況下就先省略了這個系列了。

實作使用的運算平台是 Google Colab,使用Python3 Notebook 以GPU運行。

以下是程式碼以及實驗結果。

InceptionResNet V2

# Build Model
import keras
import numpy as np
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
inception_clfr = InceptionResNetV2(include_top=True, weights='imagenet', classes=1000)
clfr_output = inception_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
import time
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(inception_clfr, bounds=(0,255))
## Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.5)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = inception_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 436.04573130607605

Inception V3

# Build Model
import keras
import numpy as np
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
inceptionV3_clfr = InceptionV3(include_top=True, weights='imagenet', classes=1000)
clfr_output = inceptionV3_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(inceptionV3_clfr, bounds=(0,255))
# Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.5)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = inceptionV3_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 160.60878705978494

Xception V1

# Build Model
import keras
import numpy as np
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
xception_clfr = Xception(include_top=True, weights='imagenet', classes=1000)
clfr_output = xception_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(299,299))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(xception_clfr, bounds=(0,255))
# Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.8)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = xception_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 142.7184829711914

VGG-16

# Build Model
import keras
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
vgg16_clfr = VGG16(include_top=True, weights='imagenet', classes=1000)
clfr_output = vgg16_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(vgg16_clfr, bounds=(0,255))
# Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.8)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = vgg16_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 80.00009250640869

VGG-19

# Build Model
import keras
import numpy as np
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
vgg19_clfr = VGG19(include_top=True, weights='imagenet', classes=1000)
clfr_output = vgg19_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(vgg19_clfr, bounds=(0,255))
# Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.8)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = vgg19_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 84.68800973892212

ResNet 50

# Build Model
import keras
import numpy as np
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
resnet50_clfr = ResNet50(include_top=True, weights='imagenet', classes=1000)
clfr_output = resnet50_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(resnet50_clfr, bounds=(0,255))
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.8)) # 388 was the original label
advexp = attack(input_image, label=388)
clfr_output = resnet50_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output)

得到的結果: Duration: 109.09112095832825

NASNet

# Build Model
import keras
import numpy as np
from keras.applications.nasnet import NASNetMobile
from keras.applications.nasnet import preprocess_input, decode_predictions
from keras.preprocessing import image
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
input_image = np.expand_dims(input_image, axis=0)
input_image = preprocess_input(input_image)
nasnetMobile_clfr = NASNetMobile(include_top=True, weights='imagenet', classes=1000)
clfr_output = nasnetMobile_clfr.predict(input_image)
decode_predictions(clfr_output, top=3)
# Attack Model
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability
image_path = '/content/traffic-light.png'
raw_image = image.load_img(image_path, target_size=(224,224))
input_image = image.img_to_array(raw_image)
sub_model = KerasModel(nasnetMobile_clfr, bounds=(0,255))
# Measure the time used to generate adversarial example
start_time = time.time()
attack = LBFGSAttack(model=sub_model, criterion=TargetClassProbability(388, p=.5)) # 388 was the original label
advexp = attack(input_image, label=388)
end_time = time.time()
print("Duration:", end_time - start_time)
clfr_output = nasnetMobile_clfr.predict(advexp[np.newaxis, :,:,:])
decode_predictions(clfr_output, top=3)

得到的結果: Duration: 424.07666277885437

綜合評比一下各模型的抵抗力(數字越大抵抗力越強):

InceptionResNet V2: 436.04573130607605

Inception V3: 160.60878705978494

Xception V1: 142.7184829711914

VGG-16: 80.00009250640869

VGG-19: 84.68800973892212

ResNet50: 109.09112095832825

NASNet: 424.07666277885437

--

--