본문 바로가기

프로그래머스 데브 코스/TIL

[6기] 프로그래머스 인공지능 데브코스 109일차 TIL

1218

[18주차 - Day1] Monthly Project

CNN_기반_이미지_분류_모델의_강건성_평가

# 내가 작성한 최종 결과랑 코드 박제
def black_box_attack_validate(model, black_box_model, attack):
    criterion = nn.CrossEntropyLoss()
    start_time = time.time()

    running_loss = 0.
    running_corrects = 0
    running_length = 0

    running_l0 = 0
    running_l2 = 0
    running_mse = 0
    running_linf = 0

    start_time = time.time()

    for i, (inputs, basic_labels) in enumerate(val_dataloader):
        inputs = inputs.to(device)
        labels = torch.zeros_like(basic_labels).long().to(device)
        for j in range(labels.shape[0]):
            labels[j] = int(class_names[basic_labels[j]])
        labels = labels.to(device)

        adv_untargeted, perturbation = attack.mi_fgsm_attack(model, inputs, labels)  # adversarial attack

        outputs = black_box_model(adv_untargeted)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        running_loss += loss.item()
        running_corrects += torch.sum(preds == labels.data)
        running_length += labels.shape[0]

        l0, l2, mse, linf = get_distance(adv_untargeted, inputs)
        running_l0 += l0.sum().item()
        running_l2 += l2.sum().item()
        running_mse += mse.sum().item()
        running_linf += linf.sum().item()

        if i == 0:
          print('The dimension of an image tensor:', inputs.shape[1:])
          print('[Prediction Result Examples]')
          images = torchvision.utils.make_grid(adv_untargeted[:4])
          imshow_batch(images.cpu(), title='original labels:' + str([int(x) for x in labels[:4]]) +
                        '\npredicted labels:' + str([int(x) for x in preds[:4]]))
          print('Original labels >>>>>>>>>>>>>>>>>>>>>>>>>')
          for j, label in enumerate(labels[:4]):
            print(f'Image #{i + 1}: {class_names[label]} ({label})')
          print('Predicted labels >>>>>>>>>>>>>>>>>>>>>>>>>')
          for j, label in enumerate(preds[:4]):
            print(f'Image #{i + 1}: {class_names[label]} ({label})')
          images = torchvision.utils.make_grid(adv_untargeted[4:8])
          imshow_batch(images.cpu(), title='original labels:' + str([int(x) for x in labels[4:8]]) +
                        '\npredicted labels:' + str([int(x) for x in preds[4:8]]))
          print('Original labels >>>>>>>>>>>>>>>>>>>>>>>>>')
          for j, label in enumerate(labels[4:8]):
            print(f'Image #{i + 1}: {class_names[label]} ({label})')
          print('Predicted labels >>>>>>>>>>>>>>>>>>>>>>>>>')
          for j, label in enumerate(preds[4:8]):
            print(f'Image #{i + 1}: {class_names[label]} ({label})')

          if i % 10 == 0:
            cur_running_loss = running_loss / running_length
            running_acc = running_corrects / running_length * 100.
            print('[Step #{}] Loss: {:.4f} Accuracy: {:.4f}% Time elapsed: {:.4f}s (total {} images)'.format(i, cur_running_loss, running_acc, time.time() - start_time, running_length))

    epoch_loss = running_loss / len(val_dataset)
    epoch_acc = running_corrects / len(val_dataset) * 100.
    print('[Validation] Loss: {:.4f} Accuracy: {:.4f}% Time elapsed: {:.4f}s (total {} images)'.format(epoch_loss, epoch_acc, time.time() - start_time, len(val_dataset)))

    print('[Size of Perturbation]')
    print('Average L0 distance (the number of changed parameters):', running_l0 / len(val_dataset))
    print('Average L2 distance:', running_l2 / len(val_dataset))
    print('Average MSE:', running_mse / len(val_dataset))
    print('Average Linf distance (the maximum changed values):', running_linf / len(val_dataset))
# 커스텀 공격 클래스 예시

class Custom1Attack:
  def __init__(self, eps, alpha, iters):
      self.eps = eps
      self.alpha = alpha
      self.iters = iters

  def mi_fgsm_attack(self, models, images, labels):
    # 이미지와 레이블 데이터를 GPU로 옮기기
    decay=1.0

    images = images.to(device)
    labels = labels.to(device)

    # 입력 이미지와 동일한 크기를 갖는 노이즈(perturbation) 생성
    # 노이즈 값은 음수가 될 수 있으므로, 평균이 0인 균등한(uniform) 랜덤 값으로 설정
    perturbation = torch.empty_like(images).uniform_(-self.eps, self.eps).to(device)
    # 손실(loss) 함수 설정
    attack_loss = nn.CrossEntropyLoss()
    momentum = torch.zeros_like(images).detach().to(device)
    for i in range(self.iters):
      for model in models:
        # required_grad 속성의 값을 True로 설정하여 해당 torch.Tensor의 연산을 추적
        perturbation.requires_grad = True

        # 현재 공격 이미지(원래 이미지에 노이즈를 섞고, 그 결과가 [0, 1] 범위에 속하도록 자르기)
        current = torch.clamp(images + perturbation, min=0, max=1)
        outputs = model(current) # 모델의 판단 결과 확인

        # 손실 함수에 대하여 미분하여 기울기(gradient) 계산
        model.zero_grad()
        cost = attack_loss(outputs, labels).to(device)
        cost.backward()

        grad = perturbation.grad
        grad = grad / torch.mean(torch.abs(grad), dim=(1,2,3), keepdim=True)
        grad = grad + momentum*decay
        momentum = grad

        # 계산된 기울기(gradient)를 이용하여 손실 함수가 증가하는 방향으로 업데이트
        diff = self.alpha * grad.sign()
        # 결과적으로 노이즈(perturbation)의 각 픽셀의 값이 [-eps, eps] 사이의 값이 되도록 자르기
        perturbation = torch.clamp(perturbation + diff, min=-self.eps, max=self.eps).detach_() # 연산을 추적하는 것을 중단하기 위해 detach() 호출

    # (최종적으로 만들어진 공격 이미지, 노이즈) 반환
    current = torch.clamp(images + perturbation, min=0, max=1)
    return current, perturbation
# 블랙박스 공격 예제 코드입니다.
# model, images, labels, targeted, eps, alpha, iters, decay
custom_attack = Custom1Attack(eps=16/255, alpha=4/255, iters=7) # 공격용 파라미터 설정
black_box_attack_validate([mobilenet_v2, wide_resnet101_2, vgg16, densenet201], black_box_model, custom_attack)