Skip to content

Commit

Permalink
Merge branch 'baoyul2/main' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
Boey-li committed Oct 13, 2022
2 parents ac61d5d + e7684fc commit 044c505
Show file tree
Hide file tree
Showing 27 changed files with 143,003 additions and 132 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
__pycache__
data
scripts
script.py
trained_model
configs
baseline
continuous_processed
temporal_*
raw_data
visualization_*
6 changes: 3 additions & 3 deletions configs/link_pred_cont_temporal_dblp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ DATASET:
TEMPORAL:
use_feat: True
val_len: 1
test_len: 2
test_len: 3

TRAIN:
max_epochs: 50
log_interval: 0
log_epoch: 1
batch_size: 32
initial_lr: 0.0001
OPTIMIZER:
type: "ADAM"

ATTACK:
method: "random"
method: "meta"
ptb_rate: 0.0
attack_data_path: "attack_data"
new_attack: False
Expand Down
6 changes: 3 additions & 3 deletions configs/link_pred_temporal_dblp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ task: "temporal_link_prediction"
seed: 321

MODEL:
model: "EULER"
model: "DYSAT"

DATASET:
dataset: "dblp"
Expand All @@ -12,10 +12,10 @@ DATASET:
test_len: 3

TRAIN:
max_epochs: 1500
max_epochs: 100
log_interval: 10
batch_size: 64
initial_lr: 0.02
initial_lr: 0.0005
OPTIMIZER:
type: "ADAM"

Expand Down
10 changes: 5 additions & 5 deletions configs/link_pred_temporal_enron10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ task: "temporal_link_prediction"
seed: 321

MODEL:
model: "EULER"
model: "DYSAT"

DATASET:
dataset: "enron10"
Expand All @@ -12,16 +12,16 @@ DATASET:
test_len: 3

TRAIN:
max_epochs: 1500
max_epochs: 100
log_interval: 10
batch_size: 64
initial_lr: 0.02
initial_lr: 0.0005
OPTIMIZER:
type: "ADAM"

ATTACK:
method: "temporal"
ptb_rate: 1.0
method: "random"
ptb_rate: 0.0
attack_data_path: "attack_data"
new_attack: False

Expand Down
8 changes: 4 additions & 4 deletions configs/link_pred_temporal_fb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ task: "temporal_link_prediction"
seed: 321

MODEL:
model: "EGCNO"
model: "EGCNH"

DATASET:
dataset: "reddit"
dataset: "fb"
TEMPORAL:
use_feat: False
test_len: 3

TRAIN:
max_epochs: 1500
max_epochs: 100
log_interval: 10
batch_size: 64
initial_lr: 0.02
initial_lr: 0.0005
OPTIMIZER:
type: "ADAM"

Expand Down
37 changes: 37 additions & 0 deletions configs/link_pred_temporal_reddit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: "link_pred_temporal"
task: "temporal_link_prediction"
seed: 321

MODEL:
model: "EGCNH"

DATASET:
dataset: "reddit"
TEMPORAL:
use_feat: False
test_len: 3

TRAIN:
max_epochs: 1500
log_interval: 10
batch_size: 64
initial_lr: 0.01
OPTIMIZER:
type: "ADAM"

ATTACK:
method: "random"
ptb_rate: 0.0
attack_data_path: "attack_data"
new_attack: False

TASK_SPECIFIC:
GEOMETRIC:
num_features: 184
num_nodes: 184
inner_prod: False
filter_size: 2

LOGGING:
log_file: "log.txt"
model_file: "model.pt"
5 changes: 3 additions & 2 deletions entries/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@
def setup(cfg, args):
# get device
if args.gpu:
device = args.device
device = args.device
else:
device = torch.device('cpu')
# device = utils.guess_device()
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

# set up dataset
data = dataset.dispatcher(cfg, device)
Expand All @@ -31,7 +32,7 @@ def setup(cfg, args):
# set up model
if cfg.MODEL.encoder != "none":
model_cls, encoder_cls = models.dispatcher(cfg)
model = model_cls(encoder_cls(cfg)).to(device)
model = model_cls(encoder_cls(cfg).to(device)).to(device)
elif cfg.MODEL.model == "DYSAT":
model_cls = models.dispatcher(cfg)
test_len = cfg.DATASET.TEMPORAL.test_len
Expand Down
17 changes: 12 additions & 5 deletions modules/attack/graph_attack.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
from xml.sax.handler import feature_external_ges
from deeprobust.graph.global_attack import Random
from deeprobust.graph.data import Dataset, Dpr2Pyg, Pyg2Dpr
import os
import torch
import pickle
import random
from tqdm import tqdm, trange
import numpy as np
from utils import get_dataset_root
import logging
import random

from utils import get_dataset_root
from scipy.sparse import csr_matrix

from deeprobust.graph.global_attack import Random
from deeprobust.graph.data import Dataset, Dpr2Pyg, Pyg2Dpr
from deeprobust.graph.defense import GCN
from deeprobust.graph.global_attack import MetaApprox
from deeprobust.graph.global_attack import Metattack
from deeprobust.graph.global_attack import NodeEmbeddingAttack
from deeprobust.graph.global_attack import DICE
from scipy.sparse import csr_matrix


def load_feat_and_label(data_name, data_path):
label = np.load(os.path.join(data_path, data_name, "label.npy"))
Expand Down Expand Up @@ -148,6 +152,9 @@ def meta_attack_temporal(cfg, adj_matrix_lst, device):
path = os.path.join(get_dataset_root(), attack_data_path, "{}_ptb_rate_{}_metaattack".format(cfg.DATASET.dataset, ptb_rate))
if cfg.ATTACK.new_attack or not os.path.exists(os.path.join(path, "adj_ptb_{}_test_{}.pickle".format(ptb_rate,test_len))):
# generate attacked data
# print(os.path.join(path, "adj_ptb_{}_test_{}.pickle".format(ptb_rate,test_len)))
# print(cfg.ATTACK.new_attack)
# exit()
logging.info("Meta attack on dataset: {} ptb_rate: {}".format(cfg.DATASET.dataset, ptb_rate))
if not os.path.exists(path):
os.mkdir(path)
Expand Down Expand Up @@ -202,7 +209,7 @@ def random_attack_temporal(cfg, adj_matrix_lst, device):
ptb_rate = cfg.ATTACK.ptb_rate
test_len = cfg.DATASET.TEMPORAL.test_len

random_method = "add" # set default random attack method here
random_method = "remove" # set default random attack method here

if ptb_rate == 0.0:
return adj_matrix_lst
Expand Down
4 changes: 4 additions & 0 deletions modules/config_guard/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
_C.meta_testing_num_classes = -1
_C.input_dim = (3, 32, 32)
_C.save_model = False
_C.device = 0

#######################
# DL System Setting
Expand Down Expand Up @@ -298,6 +299,9 @@ def update_cfg_from_args(cfg, args):

if args.model_file is not None:
cfg.LOGGING.model_file = args.model_file

if args.device is not None:
cfg.device = args.device

cfg.freeze()

Expand Down
19 changes: 12 additions & 7 deletions modules/dataset/continuous_temporal_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,19 @@ def __init__(self, cfg, device):
attack_func = attack.dispatcher(cfg)

self.val_len = self.cfg.DATASET.TEMPORAL.val_len
self.test_len = self.cfg.DATASET.TEMPORAL.test_len
self.test_len = self.cfg.DATASET.TEMPORAL.test_len - self.val_len

self.lr = self.cfg.TRAIN.initial_lr
self.ptb_rate = self.cfg.ATTACK.ptb_rate
self.attack_method = self.cfg.ATTACK.method

# generate g_df, n_feat, e_feat
self.run(data_name, use_edge_feat, attack_flag, attack_func)

process_path = os.path.join(get_process_root())
self.g_df = pd.read_csv('{}/ml_{}.csv'.format(process_path, data_name))
self.n_feat = np.load('{}/ml_{}_node.npy'.format(process_path, data_name))
self.e_feat = np.load('{}/ml_{}.npy'.format(process_path, data_name))
self.g_df = pd.read_csv('{}/ml_{}_{}_{}_lr_{}.csv'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr))
self.n_feat = np.load('{}/ml_{}_{}_{}_lr_{}_node.npy'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr))
self.e_feat = np.load('{}/ml_{}_{}_{}_lr_{}.npy'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr))

self.load_data()

Expand Down Expand Up @@ -145,9 +149,10 @@ def reindex(self, df):

def run(self, data_name, use_edge_feat, attack_flag = False, attack_func = None):
process_path = os.path.join(get_process_root())
OUT_DF = '{}/ml_{}.csv'.format(process_path, data_name)
OUT_NODE_FEAT = '{}/ml_{}_node.npy'.format(process_path, data_name)
OUT_FEAT = '{}/ml_{}.npy'.format(process_path, data_name)
OUT_DF = '{}/ml_{}_{}_{}_lr_{}.csv'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr)
OUT_NODE_FEAT = '{}/ml_{}_{}_{}_lr_{}_node.npy'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr)
OUT_FEAT = '{}/ml_{}_{}_{}_lr_{}.npy'.format(process_path, data_name, self.attack_method, self.ptb_rate, self.lr)
print(OUT_DF)

df = self.preprocess(data_name, attack_flag, attack_func)
new_df = self.reindex(df)
Expand Down
9 changes: 6 additions & 3 deletions modules/dataset/temporal_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,18 @@ def prepare(self, data_name, use_feat, attack_flag = False, attack_func = None):

adj_time_list_path = os.path.join(get_dataset_root(), data_name, "adj_time_list.pickle")
with open(adj_time_list_path, 'rb') as handle:
self.adj_time_list = pickle.load(handle,encoding="bytes")

assert self.adj_time_list[0].max() == 1.0
if data_name in ['wikipedia', 'reddit']:
self.adj_time_list = pickle.load(handle,encoding="bytes")
assert self.adj_time_list[0].max() == 1.0
elif data_name in ['enron10', 'dblp', 'fb']:
self.adj_time_list = pickle.load(handle, encoding='latin1')

adj_orig_dense_list_path = os.path.join(get_dataset_root(), data_name, "adj_orig_dense_list.pickle")
with open(adj_orig_dense_list_path, 'rb') as handle:
self.adj_orig_dense_list = pickle.load(handle,encoding="bytes")

self.num_nodes = self.gen_node_number(self.adj_time_list)

# self.adj_orig_dense_list = csr_matrix_to_tensor(self.adj_time_list, self.num_nodes)
# self.adj_orig_dense_list, self.adj_time_list = to_undirect(self.adj_orig_dense_list) # to undirect
# self.adj_time_list = to_undirect(self.adj_time_list) # to undirect
Expand Down
3 changes: 0 additions & 3 deletions modules/models/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ def dispatcher(cfg):
if encoder == "linear":
from .encoders import VariationalLinearEncoder as encoder_cls
return VGAE_cls, encoder_cls
if model_name == "GCN":
from models.GCN import GCN as GCN_cls
return GCN_cls

# Discrete Temporal Network
if model_name == "VGRNN":
Expand Down
6 changes: 5 additions & 1 deletion modules/models/dysat.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, num_features, time_length):
self.bceloss = BCEWithLogitsLoss()

def forward(self, graphs):

torch.cuda.empty_cache()
# Structural Attention forward
structural_out = []
for t in range(0, self.num_time_steps):
Expand Down Expand Up @@ -118,6 +118,8 @@ def get_loss(self, feed_dict):
pos_loss = self.bceloss(pos_score, torch.ones_like(pos_score))
neg_loss = self.bceloss(neg_score, torch.ones_like(neg_score))
graphloss = pos_loss + self.neg_weight*neg_loss
# print("pos_loss", pos_loss)
# print("neg_loss", self.neg_weight*neg_loss)
self.graph_loss += graphloss
return self.graph_loss

Expand Down Expand Up @@ -150,6 +152,7 @@ def __init__(self,
self.xavier_init()

def forward(self, graph):
torch.cuda.empty_cache()
graph = copy.deepcopy(graph)
edge_index = graph.edge_index
edge_weight = graph.edge_weight.reshape(-1, 1)
Expand Down Expand Up @@ -209,6 +212,7 @@ def __init__(self,


def forward(self, inputs):
torch.cuda.empty_cache()
"""In: attn_outputs (of StructuralAttentionLayer at each snapshot):= [N, T, F]"""
# 1: Add position embeddings to input
position_inputs = torch.arange(0,self.num_time_steps).reshape(1, -1).repeat(inputs.shape[0], 1).long().to(inputs.device)
Expand Down
1 change: 1 addition & 0 deletions modules/models/egcn_h.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def forward(self,A_list, Nodes_list):
Overriding their forward method to return all timesteps
instead of just the last one
'''
torch.cuda.empty_cache()
masks = [torch.zeros(A_list[0].size(0),1).to(self.device) for _ in range(len(A_list))]

for unit in self.GRCU_layers:
Expand Down
2 changes: 1 addition & 1 deletion modules/models/egcn_o.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def forward(self,A_list, Nodes_list):
Overriding their forward method to return all timesteps
instead of just the last one
'''

torch.cuda.empty_cache()
for unit in self.GRCU_layers:
Nodes_list = unit(A_list,Nodes_list)

Expand Down
3 changes: 2 additions & 1 deletion modules/models/euler.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ def calc_loss(self, t_scores, f_scores):
EPS = 1e-6
pos_loss = -torch.log(t_scores+EPS).mean()
neg_loss = -torch.log(1-f_scores+EPS).mean()

print("pos loss:", (1-self.neg_weight) * pos_loss )
print("neg loss:", self.neg_weight * neg_loss)
return (1-self.neg_weight) * pos_loss + self.neg_weight * neg_loss


Expand Down
Loading

0 comments on commit 044c505

Please sign in to comment.