import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
from deeprobust.graph import utils
from deeprobust.graph.defense import GCN
from tqdm import tqdm
import scipy.sparse as sp
import numpy as np
from numba import njit
[docs]class GCNSVD(GCN):
"""GCNSVD is a 2 Layer Graph Convolutional Network with Truncated SVD as
preprocessing. See more details in All You Need Is Low (Rank): Defending
Against Adversarial Attacks on Graphs,
https://dl.acm.org/doi/abs/10.1145/3336191.3371789.
Parameters
----------
nfeat : int
size of input feature dimension
nhid : int
number of hidden units
nclass : int
size of output dimension
dropout : float
dropout rate for GCN
lr : float
learning rate for GCN
weight_decay : float
weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0.
with_relu : bool
whether to use relu activation function. If False, GCN will be linearized.
with_bias: bool
whether to include bias term in GCN weights.
device: str
'cpu' or 'cuda'.
Examples
--------
We can first load dataset and then train GCNSVD.
>>> from deeprobust.graph.data import PrePtbDataset, Dataset
>>> from deeprobust.graph.defense import GCNSVD
>>> # load clean graph data
>>> data = Dataset(root='/tmp/', name='cora', seed=15)
>>> adj, features, labels = data.adj, data.features, data.labels
>>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
>>> # load perturbed graph data
>>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora')
>>> perturbed_adj = perturbed_data.adj
>>> # train defense model
>>> model = GCNSVD(nfeat=features.shape[1],
nhid=16,
nclass=labels.max().item() + 1,
dropout=0.5, device='cpu').to('cpu')
>>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20)
"""
def __init__(self, nfeat, nhid, nclass, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'):
super(GCNSVD, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device)
self.device = device
self.k = None
[docs] def fit(self, features, adj, labels, idx_train, idx_val=None, k=50, train_iters=200, initialize=True, verbose=True, **kwargs):
"""First perform rank-k approximation of adjacency matrix via
truncated SVD, and then train the gcn model on the processed graph,
when idx_val is not None, pick the best model according to
the validation loss.
Parameters
----------
features :
node features
adj :
the adjacency matrix. The format could be torch.tensor or scipy matrix
labels :
node labels
idx_train :
node training indices
idx_val :
node validation indices. If not given (None), GCN training process will not adpot early stopping
k : int
number of singular values and vectors to compute.
train_iters : int
number of training epochs
initialize : bool
whether to initialize parameters before training
verbose : bool
whether to show verbose logs
"""
modified_adj = self.truncatedSVD(adj, k=k)
self.k = k
# modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device)
self.modified_adj = modified_adj
self.features = features
self.labels = labels
super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose)
[docs] def truncatedSVD(self, data, k=50):
"""Truncated SVD on input data.
Parameters
----------
data :
input matrix to be decomposed
k : int
number of singular values and vectors to compute.
Returns
-------
numpy.array
reconstructed matrix.
"""
print('=== GCN-SVD: rank={} ==='.format(k))
if sp.issparse(data):
data = data.asfptype()
U, S, V = sp.linalg.svds(data, k=k)
print("rank_after = {}".format(len(S.nonzero()[0])))
diag_S = np.diag(S)
else:
U, S, V = np.linalg.svd(data)
U = U[:, :k]
S = S[:k]
V = V[:k, :]
print("rank_before = {}".format(len(S.nonzero()[0])))
diag_S = np.diag(S)
print("rank_after = {}".format(len(diag_S.nonzero()[0])))
return U @ diag_S @ V
[docs] def predict(self, features=None, adj=None):
"""By default, the inputs should be unnormalized adjacency
Parameters
----------
features :
node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.
adj :
adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.
Returns
-------
torch.FloatTensor
output (log probabilities) of GCNSVD
"""
self.eval()
if features is None and adj is None:
return self.forward(self.features, self.adj_norm)
else:
adj = self.truncatedSVD(adj, k=self.k)
if type(adj) is not torch.Tensor:
features, adj = utils.to_tensor(features, adj, device=self.device)
self.features = features
if utils.is_sparse_tensor(adj):
self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
self.adj_norm = utils.normalize_adj_tensor(adj)
return self.forward(self.features, self.adj_norm)
[docs]class GCNJaccard(GCN):
"""GCNJaccard first preprocesses input graph via droppining dissimilar
edges and train a GCN based on the processed graph. See more details in
Adversarial Examples on Graph Data: Deep Insights into Attack and Defense,
https://arxiv.org/pdf/1903.01610.pdf.
Parameters
----------
nfeat : int
size of input feature dimension
nhid : int
number of hidden units
nclass : int
size of output dimension
dropout : float
dropout rate for GCN
lr : float
learning rate for GCN
weight_decay : float
weight decay coefficient (l2 normalization) for GCN. When `with_relu` is True, `weight_decay` will be set to 0.
with_relu : bool
whether to use relu activation function. If False, GCN will be linearized.
with_bias: bool
whether to include bias term in GCN weights.
device: str
'cpu' or 'cuda'.
Examples
--------
We can first load dataset and then train GCNJaccard.
>>> from deeprobust.graph.data import PrePtbDataset, Dataset
>>> from deeprobust.graph.defense import GCNJaccard
>>> # load clean graph data
>>> data = Dataset(root='/tmp/', name='cora', seed=15)
>>> adj, features, labels = data.adj, data.features, data.labels
>>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
>>> # load perturbed graph data
>>> perturbed_data = PrePtbDataset(root='/tmp/', name='cora')
>>> perturbed_adj = perturbed_data.adj
>>> # train defense model
>>> model = GCNJaccard(nfeat=features.shape[1],
nhid=16,
nclass=labels.max().item() + 1,
dropout=0.5, device='cpu').to('cpu')
>>> model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.03)
"""
def __init__(self, nfeat, nhid, nclass, binary_feature=True, dropout=0.5, lr=0.01, weight_decay=5e-4, with_relu=True, with_bias=True, device='cpu'):
super(GCNJaccard, self).__init__(nfeat, nhid, nclass, dropout, lr, weight_decay, with_relu, with_bias, device=device)
self.device = device
self.binary_feature = binary_feature
[docs] def fit(self, features, adj, labels, idx_train, idx_val=None, threshold=0.01, train_iters=200, initialize=True, verbose=True, **kwargs):
"""First drop dissimilar edges with similarity smaller than given
threshold and then train the gcn model on the processed graph.
When idx_val is not None, pick the best model according to the
validation loss.
Parameters
----------
features :
node features. The format can be numpy.array or scipy matrix
adj :
the adjacency matrix.
labels :
node labels
idx_train :
node training indices
idx_val :
node validation indices. If not given (None), GCN training process will not adpot early stopping
threshold : float
similarity threshold for dropping edges. If two connected nodes with similarity smaller than threshold, the edge between them will be removed.
train_iters : int
number of training epochs
initialize : bool
whether to initialize parameters before training
verbose : bool
whether to show verbose logs
"""
self.threshold = threshold
modified_adj = self.drop_dissimilar_edges(features, adj)
# modified_adj_tensor = utils.sparse_mx_to_torch_sparse_tensor(self.modified_adj)
features, modified_adj, labels = utils.to_tensor(features, modified_adj, labels, device=self.device)
self.modified_adj = modified_adj
self.features = features
self.labels = labels
super().fit(features, modified_adj, labels, idx_train, idx_val, train_iters=train_iters, initialize=initialize, verbose=verbose)
[docs] def drop_dissimilar_edges(self, features, adj, metric='similarity'):
"""Drop dissimilar edges.(Faster version using numba)
"""
if not sp.issparse(adj):
adj = sp.csr_matrix(adj)
adj_triu = sp.triu(adj, format='csr')
if sp.issparse(features):
features = features.todense().A # make it easier for njit processing
if metric == 'distance':
removed_cnt = dropedge_dis(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold)
else:
if self.binary_feature:
removed_cnt = dropedge_jaccard(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold)
else:
removed_cnt = dropedge_cosine(adj_triu.data, adj_triu.indptr, adj_triu.indices, features, threshold=self.threshold)
print('removed %s edges in the original graph' % removed_cnt)
modified_adj = adj_triu + adj_triu.transpose()
return modified_adj
[docs] def predict(self, features=None, adj=None):
"""By default, the inputs should be unnormalized adjacency
Parameters
----------
features :
node features. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.
adj :
adjcency matrix. If `features` and `adj` are not given, this function will use previous stored `features` and `adj` from training to make predictions.
Returns
-------
torch.FloatTensor
output (log probabilities) of GCNJaccard
"""
self.eval()
if features is None and adj is None:
return self.forward(self.features, self.adj_norm)
else:
adj = self.drop_dissimilar_edges(features, adj)
if type(adj) is not torch.Tensor:
features, adj = utils.to_tensor(features, adj, device=self.device)
self.features = features
if utils.is_sparse_tensor(adj):
self.adj_norm = utils.normalize_adj_tensor(adj, sparse=True)
else:
self.adj_norm = utils.normalize_adj_tensor(adj)
return self.forward(self.features, self.adj_norm)
def _drop_dissimilar_edges(self, features, adj):
"""Drop dissimilar edges. (Slower version)
"""
if not sp.issparse(adj):
adj = sp.csr_matrix(adj)
modified_adj = adj.copy().tolil()
# preprocessing based on features
print('=== GCN-Jaccrad ===')
edges = np.array(modified_adj.nonzero()).T
removed_cnt = 0
for edge in tqdm(edges):
n1 = edge[0]
n2 = edge[1]
if n1 > n2:
continue
if self.binary_feature:
J = self._jaccard_similarity(features[n1], features[n2])
if J < self.threshold:
modified_adj[n1, n2] = 0
modified_adj[n2, n1] = 0
removed_cnt += 1
else:
# For not binary feature, use cosine similarity
C = self._cosine_similarity(features[n1], features[n2])
if C < self.threshold:
modified_adj[n1, n2] = 0
modified_adj[n2, n1] = 0
removed_cnt += 1
print('removed %s edges in the original graph' % removed_cnt)
return modified_adj
def _jaccard_similarity(self, a, b):
intersection = a.multiply(b).count_nonzero()
J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection)
return J
def _cosine_similarity(self, a, b):
inner_product = (a * b).sum()
C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-10)
return C
def __dropedge_jaccard(A, iA, jA, features, threshold):
# deprecated: for sparse feature matrix...
removed_cnt = 0
for row in range(len(iA)-1):
for i in range(iA[row], iA[row+1]):
# print(row, jA[i], A[i])
n1 = row
n2 = jA[i]
a, b = features[n1], features[n2]
intersection = a.multiply(b).count_nonzero()
J = intersection * 1.0 / (a.count_nonzero() + b.count_nonzero() - intersection)
if J < threshold:
A[i] = 0
# A[n2, n1] = 0
removed_cnt += 1
return removed_cnt
@njit
def dropedge_jaccard(A, iA, jA, features, threshold):
removed_cnt = 0
for row in range(len(iA)-1):
for i in range(iA[row], iA[row+1]):
# print(row, jA[i], A[i])
n1 = row
n2 = jA[i]
a, b = features[n1], features[n2]
intersection = np.count_nonzero(a*b)
J = intersection * 1.0 / (np.count_nonzero(a) + np.count_nonzero(b) - intersection)
if J < threshold:
A[i] = 0
# A[n2, n1] = 0
removed_cnt += 1
return removed_cnt
@njit
def dropedge_cosine(A, iA, jA, features, threshold):
removed_cnt = 0
for row in range(len(iA)-1):
for i in range(iA[row], iA[row+1]):
# print(row, jA[i], A[i])
n1 = row
n2 = jA[i]
a, b = features[n1], features[n2]
inner_product = (a * b).sum()
C = inner_product / (np.sqrt(np.square(a).sum()) * np.sqrt(np.square(b).sum()) + 1e-8)
if C < threshold:
A[i] = 0
# A[n2, n1] = 0
removed_cnt += 1
return removed_cnt
@njit
def dropedge_dis(A, iA, jA, features, threshold):
removed_cnt = 0
for row in range(len(iA)-1):
for i in range(iA[row], iA[row+1]):
# print(row, jA[i], A[i])
n1 = row
n2 = jA[i]
C = np.linalg.norm(features[n1] - features[n2])
if C > threshold:
A[i] = 0
# A[n2, n1] = 0
removed_cnt += 1
return removed_cnt
@njit
def dropedge_both(A, iA, jA, features, threshold1=2.5, threshold2=0.01):
removed_cnt = 0
for row in range(len(iA)-1):
for i in range(iA[row], iA[row+1]):
# print(row, jA[i], A[i])
n1 = row
n2 = jA[i]
C1 = np.linalg.norm(features[n1] - features[n2])
a, b = features[n1], features[n2]
inner_product = (a * b).sum()
C2 = inner_product / (np.sqrt(np.square(a).sum() + np.square(b).sum())+ 1e-6)
if C1 > threshold1 or threshold2 < 0:
A[i] = 0
# A[n2, n1] = 0
removed_cnt += 1
return removed_cnt
if __name__ == "__main__":
from deeprobust.graph.data import PrePtbDataset, Dataset
# load clean graph data
dataset_str = 'pubmed'
data = Dataset(root='/tmp/', name=dataset_str, seed=15)
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
# load perturbed graph data
perturbed_data = PrePtbDataset(root='/tmp/', name=dataset_str)
perturbed_adj = perturbed_data.adj
# train defense model
print("Test GCNJaccard")
model = GCNJaccard(nfeat=features.shape[1],
nhid=16,
nclass=labels.max().item() + 1,
binary_feature=False,
dropout=0.5, device='cuda').to('cuda')
model.fit(features, perturbed_adj, labels, idx_train, idx_val, threshold=0.1)
model.test(idx_test)
prediction_1 = model.predict()
prediction_2 = model.predict(features, perturbed_adj)
assert (prediction_1 != prediction_2).sum() == 0
print("Test GCNSVD")
model = GCNSVD(nfeat=features.shape[1],
nhid=16,
nclass=labels.max().item() + 1,
dropout=0.5, device='cuda').to('cuda')
model.fit(features, perturbed_adj, labels, idx_train, idx_val, k=20)
model.test(idx_test)
prediction_1 = model.predict()
prediction_2 = model.predict(features, perturbed_adj)
assert (prediction_1 - prediction_2).mean() < 1e-5