Source code for deeprobust.graph.global_attack.node_embedding_attack

"""
Code in this file is modified from https://github.com/abojchevski/node_embedding_attack

'Adversarial Attacks on Node Embeddings via Graph Poisoning'
Aleksandar Bojchevski and Stephan Günnemann, ICML 2019
http://proceedings.mlr.press/v97/bojchevski19a.html
Copyright (C) owned by the authors, 2019
"""

import numba
import numpy as np
import scipy.sparse as sp
import scipy.linalg as spl
import torch
import networkx as nx
from deeprobust.graph.global_attack import BaseAttack


[docs]class NodeEmbeddingAttack(BaseAttack): """Node embedding attack. Adversarial Attacks on Node Embeddings via Graph Poisoning. Aleksandar Bojchevski and Stephan Günnemann, ICML 2019 http://proceedings.mlr.press/v97/bojchevski19a.html Examples ----- >>> from deeprobust.graph.data import Dataset >>> from deeprobust.graph.global_attack import NodeEmbeddingAttack >>> data = Dataset(root='/tmp/', name='cora_ml', seed=15) >>> adj, features, labels = data.adj, data.features, data.labels >>> model = NodeEmbeddingAttack() >>> model.attack(adj, attack_type="remove") >>> modified_adj = model.modified_adj >>> model.attack(adj, attack_type="remove", min_span_tree=True) >>> modified_adj = model.modified_adj >>> model.attack(adj, attack_type="add", n_candidates=10000) >>> modified_adj = model.modified_adj >>> model.attack(adj, attack_type="add_by_remove", n_candidates=10000) >>> modified_adj = model.modified_adj """ def __init__(self): pass
[docs] def attack(self, adj, n_perturbations=1000, dim=32, window_size=5, attack_type="remove", min_span_tree=False, n_candidates=None, seed=None, **kwargs): """Selects the top (n_perturbations) number of flips using our perturbation attack. :param adj: sp.spmatrix The graph represented as a sparse scipy matrix :param n_perturbations: int Number of flips to select :param dim: int Dimensionality of the embeddings. :param window_size: int Co-occurence window size. :param attack_type: str can be chosed from ["remove", "add", "add_by_remove"] :param min_span_tree: bool Whether to disallow edges that lie on the minimum spanning tree; only valid when `attack_type` is "remove" :param n_candidates: int Number of candiates for addition; only valid when `attack_type` is "add" or "add_by_remove"; :param seed: int Random seed """ assert attack_type in ["remove", "add", "add_by_remove"], \ "attack_type can only be `remove` or `add`" if attack_type == "remove": if min_span_tree: candidates = self.generate_candidates_removal_minimum_spanning_tree(adj) else: candidates = self.generate_candidates_removal(adj, seed) elif attack_type == "add" or attack_type == "add_by_remove": assert n_candidates, "please specify the value of `n_candidates`, " \ + "i.e. how many candiate you want to genereate for addition" candidates = self.generate_candidates_addition(adj, n_candidates, seed) n_nodes = adj.shape[0] if attack_type == "add_by_remove": candidates_add = candidates adj_add = self.flip_candidates(adj, candidates_add) vals_org_add, vecs_org_add = spl.eigh(adj_add.toarray(), np.diag(adj_add.sum(1).A1)) flip_indicator = 1 - 2 * adj_add[candidates[:, 0], candidates[:, 1]].A1 loss_est = estimate_loss_with_delta_eigenvals(candidates_add, flip_indicator, vals_org_add, vecs_org_add, n_nodes, dim, window_size) loss_argsort = loss_est.argsort() top_flips = candidates_add[loss_argsort[:n_perturbations]] else: # vector indicating whether we are adding an edge (+1) or removing an edge (-1) delta_w = 1 - 2 * adj[candidates[:, 0], candidates[:, 1]].A1 # generalized eigenvalues/eigenvectors deg_matrix = np.diag(adj.sum(1).A1) vals_org, vecs_org = spl.eigh(adj.toarray(), deg_matrix) loss_for_candidates = estimate_loss_with_delta_eigenvals(candidates, delta_w, vals_org, vecs_org, n_nodes, dim, window_size) top_flips = candidates[loss_for_candidates.argsort()[-n_perturbations:]] assert len(top_flips) == n_perturbations modified_adj = self.flip_candidates(adj, top_flips) self.check_adj(modified_adj) self.modified_adj = modified_adj
[docs] def generate_candidates_removal(self, adj, seed=None): """Generates candidate edge flips for removal (edge -> non-edge), disallowing one random edge per node to prevent singleton nodes. :param adj: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :param seed: int Random seed :return: np.ndarray, shape [?, 2] Candidate set of edge flips """ n_nodes = adj.shape[0] if seed is not None: np.random.seed(seed) deg = np.where(adj.sum(1).A1 == 1)[0] hiddeen = np.column_stack( (np.arange(n_nodes), np.fromiter(map(np.random.choice, adj.tolil().rows), dtype=np.int32))) adj_hidden = edges_to_sparse(hiddeen, adj.shape[0]) adj_hidden = adj_hidden.maximum(adj_hidden.T) adj_keep = adj - adj_hidden candidates = np.column_stack((sp.triu(adj_keep).nonzero())) candidates = candidates[np.logical_not(np.in1d(candidates[:, 0], deg) | np.in1d(candidates[:, 1], deg))] return candidates
[docs] def generate_candidates_removal_minimum_spanning_tree(self, adj): """Generates candidate edge flips for removal (edge -> non-edge), disallowing edges that lie on the minimum spanning tree. adj: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :return: np.ndarray, shape [?, 2] Candidate set of edge flips """ mst = sp.csgraph.minimum_spanning_tree(adj) mst = mst.maximum(mst.T) adj_sample = adj - mst candidates = np.column_stack(sp.triu(adj_sample, 1).nonzero()) return candidates
[docs] def generate_candidates_addition(self, adj, n_candidates, seed=None): """Generates candidate edge flips for addition (non-edge -> edge). :param adj: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :param n_candidates: int Number of candidates to generate. :param seed: int Random seed :return: np.ndarray, shape [?, 2] Candidate set of edge flips """ if seed is not None: np.random.seed(seed) num_nodes = adj.shape[0] candidates = np.random.randint(0, num_nodes, [n_candidates * 5, 2]) candidates = candidates[candidates[:, 0] < candidates[:, 1]] candidates = candidates[adj[candidates[:, 0], candidates[:, 1]].A1 == 0] candidates = np.array(list(set(map(tuple, candidates)))) candidates = candidates[:n_candidates] assert len(candidates) == n_candidates return candidates
[docs] def flip_candidates(self, adj, candidates): """Flip the edges in the candidate set to non-edges and vise-versa. :param adj: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :return: sp.csr_matrix, shape [n_nodes, n_nodes] Adjacency matrix of the graph with the flipped edges/non-edges. """ adj_flipped = adj.copy().tolil() adj_flipped[candidates[:, 0], candidates[:, 1]] = 1 - adj[candidates[:, 0], candidates[:, 1]] adj_flipped[candidates[:, 1], candidates[:, 0]] = 1 - adj[candidates[:, 1], candidates[:, 0]] adj_flipped = adj_flipped.tocsr() adj_flipped.eliminate_zeros() return adj_flipped
@numba.jit(nopython=True) def estimate_loss_with_delta_eigenvals(candidates, flip_indicator, vals_org, vecs_org, n_nodes, dim, window_size): """Computes the estimated loss using the change in the eigenvalues for every candidate edge flip. :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips, :param flip_indicator: np.ndarray, shape [?] Vector indicating whether we are adding an edge (+1) or removing an edge (-1) :param vals_org: np.ndarray, shape [n] The generalized eigenvalues of the clean graph :param vecs_org: np.ndarray, shape [n, n] The generalized eigenvectors of the clean graph :param n_nodes: int Number of nodes :param dim: int Embedding dimension :param window_size: int Size of the window :return: np.ndarray, shape [?] Estimated loss for each candidate flip """ loss_est = np.zeros(len(candidates)) for x in range(len(candidates)): i, j = candidates[x] vals_est = vals_org + flip_indicator[x] * ( 2 * vecs_org[i] * vecs_org[j] - vals_org * (vecs_org[i] ** 2 + vecs_org[j] ** 2)) vals_sum_powers = sum_of_powers(vals_est, window_size) loss_ij = np.sqrt(np.sum(np.sort(vals_sum_powers ** 2)[:n_nodes - dim])) loss_est[x] = loss_ij return loss_est @numba.jit(nopython=True) def estimate_delta_eigenvecs(candidates, flip_indicator, degrees, vals_org, vecs_org, delta_eigvals, pinvs): """Computes the estimated change in the eigenvectors for every candidate edge flip. :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips, :param flip_indicator: np.ndarray, shape [?] Vector indicating whether we are adding an edge (+1) or removing an edge (-1) :param degrees: np.ndarray, shape [n] Vector of node degrees. :param vals_org: np.ndarray, shape [n] The generalized eigenvalues of the clean graph :param vecs_org: np.ndarray, shape [n, n] The generalized eigenvectors of the clean graph :param delta_eigvals: np.ndarray, shape [?, n] Estimated change in the eigenvalues for all candidate edge flips :param pinvs: np.ndarray, shape [k, n, n] Precomputed pseudo-inverse matrices for every dimension :return: np.ndarray, shape [?, n, k] Estimated change in the eigenvectors for all candidate edge flips """ n_nodes, dim = vecs_org.shape n_candidates = len(candidates) delta_eigvecs = np.zeros((n_candidates, dim, n_nodes)) for k in range(dim): cur_eigvecs = vecs_org[:, k] cur_eigvals = vals_org[k] for c in range(n_candidates): degree_eigvec = (-delta_eigvals[c, k] * degrees) * cur_eigvecs i, j = candidates[c] degree_eigvec[i] += cur_eigvecs[j] - cur_eigvals * cur_eigvecs[i] degree_eigvec[j] += cur_eigvecs[i] - cur_eigvals * cur_eigvecs[j] delta_eigvecs[c, k] = np.dot(pinvs[k], flip_indicator[c] * degree_eigvec) return delta_eigvecs def estimate_delta_eigvals(candidates, adj, vals_org, vecs_org): """Computes the estimated change in the eigenvalues for every candidate edge flip. :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :param adj: sp.spmatrix The graph represented as a sparse scipy matrix :param vals_org: np.ndarray, shape [n] The generalized eigenvalues of the clean graph :param vecs_org: np.ndarray, shape [n, n] The generalized eigenvectors of the clean graph :return: np.ndarray, shape [?, n] Estimated change in the eigenvalues for all candidate edge flips """ # vector indicating whether we are adding an edge (+1) or removing an edge (-1) delta_w = 1 - 2 * adj[candidates[:, 0], candidates[:, 1]].A1 delta_eigvals = delta_w[:, None] * (2 * vecs_org[candidates[:, 0]] * vecs_org[candidates[:, 1]] - vals_org * ( vecs_org[candidates[:, 0]] ** 2 + vecs_org[candidates[:, 1]] ** 2)) return delta_eigvals
[docs]class OtherNodeEmbeddingAttack(NodeEmbeddingAttack): """ Baseline methods from the paper Adversarial Attacks on Node Embeddings via Graph Poisoning. Aleksandar Bojchevski and Stephan Günnemann, ICML 2019. http://proceedings.mlr.press/v97/bojchevski19a.html Examples ----- >>> from deeprobust.graph.data import Dataset >>> from deeprobust.graph.global_attack import OtherNodeEmbeddingAttack >>> data = Dataset(root='/tmp/', name='cora_ml', seed=15) >>> adj, features, labels = data.adj, data.features, data.labels >>> model = OtherNodeEmbeddingAttack(type='degree') >>> model.attack(adj, attack_type="remove") >>> modified_adj = model.modified_adj >>> # >>> model = OtherNodeEmbeddingAttack(type='eigencentrality') >>> model.attack(adj, attack_type="remove") >>> modified_adj = model.modified_adj >>> # >>> model = OtherNodeEmbeddingAttack(type='random') >>> model.attack(adj, attack_type="add", n_candidates=10000) >>> modified_adj = model.modified_adj """ def __init__(self, type): assert type in ["degree", "eigencentrality", "random"] self.type = type
[docs] def attack(self, adj, n_perturbations=1000, attack_type="remove", min_span_tree=False, n_candidates=None, seed=None, **kwargs): """Selects the top (n_perturbations) number of flips using our perturbation attack. :param adj: sp.spmatrix The graph represented as a sparse scipy matrix :param n_perturbations: int Number of flips to select :param dim: int Dimensionality of the embeddings. :param attack_type: str can be chosed from ["remove", "add"] :param min_span_tree: bool Whether to disallow edges that lie on the minimum spanning tree; only valid when `attack_type` is "remove" :param n_candidates: int Number of candiates for addition; only valid when `attack_type` is "add"; :param seed: int Random seed; :return: np.ndarray, shape [?, 2] The top edge flips from the candidate set """ assert attack_type in ["remove", "add"], \ "attack_type can only be `remove` or `add`" if attack_type == "remove": if min_span_tree: candidates = self.generate_candidates_removal_minimum_spanning_tree(adj) else: candidates = self.generate_candidates_removal(adj, seed) elif attack_type == "add": assert n_candidates, "please specify the value of `n_candidates`, " \ + "i.e. how many candiate you want to genereate for addition" candidates = self.generate_candidates_addition(adj, n_candidates, seed) else: raise NotImplementedError if self.type == "random": top_flips = self.random_top_flips(candidates, n_perturbations, seed) elif self.type == "eigencentrality": top_flips = self.eigencentrality_top_flips(adj, candidates, n_perturbations) elif self.type == "degree": top_flips = self.degree_top_flips(adj, candidates, n_perturbations, complement=False) else: raise NotImplementedError assert len(top_flips) == n_perturbations modified_adj = self.flip_candidates(adj, top_flips) self.check_adj(modified_adj) self.modified_adj = modified_adj
[docs] def random_top_flips(self, candidates, n_perturbations, seed=None): """Selects (n_perturbations) number of flips at random. :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :param n_perturbations: int Number of flips to select :param seed: int Random seed :return: np.ndarray, shape [?, 2] The top edge flips from the candidate set """ if seed is not None: np.random.seed(seed) return candidates[np.random.permutation(len(candidates))[:n_perturbations]]
[docs] def eigencentrality_top_flips(self, adj, candidates, n_perturbations): """Selects the top (n_perturbations) number of flips using eigencentrality score of the edges. Applicable only when removing edges. :param adj: sp.spmatrix The graph represented as a sparse scipy matrix :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :param n_perturbations: int Number of flips to select :return: np.ndarray, shape [?, 2] The top edge flips from the candidate set """ edges = np.column_stack(sp.triu(adj, 1).nonzero()) line_graph = construct_line_graph(adj) eigcentrality_scores = nx.eigenvector_centrality_numpy(nx.Graph(line_graph)) eigcentrality_scores = {tuple(edges[k]): eigcentrality_scores[k] for k, v in eigcentrality_scores.items()} eigcentrality_scores = np.array([eigcentrality_scores[tuple(cnd)] for cnd in candidates]) scores_argsrt = eigcentrality_scores.argsort() return candidates[scores_argsrt[-n_perturbations:]]
[docs] def degree_top_flips(self, adj, candidates, n_perturbations, complement): """Selects the top (n_perturbations) number of flips using degree centrality score of the edges. :param adj: sp.spmatrix The graph represented as a sparse scipy matrix :param candidates: np.ndarray, shape [?, 2] Candidate set of edge flips :param n_perturbations: int Number of flips to select :param complement: bool Whether to look at the complement graph :return: np.ndarray, shape [?, 2] The top edge flips from the candidate set """ if complement: adj = sp.csr_matrix(1-adj.toarray()) deg = adj.sum(1).A1 deg_argsort = (deg[candidates[:, 0]] + deg[candidates[:, 1]]).argsort() return candidates[deg_argsort[-n_perturbations:]]
@numba.jit(nopython=True) def sum_of_powers(x, power): """For each x_i, computes \sum_{r=1}^{pow) x_i^r (elementwise sum of powers). :param x: shape [?] Any vector :param pow: int The largest power to consider :return: shape [?] Vector where each element is the sum of powers from 1 to pow. """ n = x.shape[0] sum_powers = np.zeros((power, n)) for i, i_power in enumerate(range(1, power + 1)): sum_powers[i] = np.power(x, i_power) return sum_powers.sum(0) def edges_to_sparse(edges, num_nodes, weights=None): if weights is None: weights = np.ones(edges.shape[0]) return sp.coo_matrix((weights, (edges[:, 0], edges[:, 1])), shape=(num_nodes, num_nodes)).tocsr() def construct_line_graph(adj): """Construct a line graph from an undirected original graph. Parameters ---------- adj : sp.spmatrix [n_samples ,n_samples] Symmetric binary adjacency matrix. Returns ------- L : sp.spmatrix, shape [A.nnz/2, A.nnz/2] Symmetric binary adjacency matrix of the line graph. """ N = adj.shape[0] edges = np.column_stack(sp.triu(adj, 1).nonzero()) e1, e2 = edges[:, 0], edges[:, 1] I = sp.eye(N).tocsr() E1 = I[e1] E2 = I[e2] L = E1.dot(E1.T) + E1.dot(E2.T) + E2.dot(E1.T) + E2.dot(E2.T) return L - 2 * sp.eye(L.shape[0]) if __name__ == "__main__": from deeprobust.graph.data import Dataset from deeprobust.graph.defense import DeepWalk import itertools # load clean graph data dataset_str = 'cora_ml' data = Dataset(root='/tmp/', name=dataset_str, seed=15) adj, features, labels = data.adj, data.features, data.labels idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test comb = itertools.product(["random", "degree", "eigencentrality"], ["remove", "add"]) for type, attack_type in comb: model = OtherNodeEmbeddingAttack(type=type) print(model.type, attack_type) try: model.attack(adj, attack_type=attack_type, n_candidates=10000) defender = DeepWalk() defender.fit(adj) defender.evaluate_node_classification(labels, idx_train, idx_test) except KeyError: print('eigencentrality only supports removing edges') model = NodeEmbeddingAttack() model.attack(adj, attack_type="remove") model.attack(adj, attack_type="remove", min_span_tree=True) modified_adj = model.modified_adj model.attack(adj, attack_type="add", n_candidates=10000) model.attack(adj, attack_type="add_by_remove", n_candidates=10000) # model.attack(adj, attack_type="add")