Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ShinKyuY authored Jul 22, 2021
1 parent 453b89e commit e157eaa
Show file tree
Hide file tree
Showing 5 changed files with 425 additions and 0 deletions.
102 changes: 102 additions & 0 deletions GESM.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from utils import *


class GESM():
def __init__(self, hid_units, Aprop_num, n_heads, n_nodes, f_dimension, nb_classes,
lr, reg, prob, prob_att, beta, adj_idx, idx_size):
self.X = tf.sparse_placeholder(tf.float32, name='X')
self.y = tf.placeholder('float32', name='y')
self.mask = tf.placeholder('float32', name='Mask')
self.adj = tf.sparse_placeholder(tf.float32, name='ADJ')
self.dropout = tf.placeholder('float32', name='dropout')
self.dropout_att = tf.placeholder('float32', name='dropout_att')
self.nodes = n_nodes
self.prob = prob
self.prob_att = prob_att
self.reg = reg

attns = []
z_save = []
for _ in range(n_heads):
att_z, z = NI_ATT(
self.X,
in_sz=f_dimension,
adj_mat=self.adj,
out_sz=hid_units,
activation=tf.nn.elu,
nb_nodes=n_nodes,
att_drop=self.dropout_att
)
attns.append(att_z)
z_save.append(z)
z_att = tf.concat(attns, axis=-1)
z = tf.concat(z_save, axis=-1)
self.recon_loss = self.recon_reg(adj_idx, idx_size, z, n_nodes, beta=beta)

self.concat_z = [z_att]
for i in range(Aprop_num - 1):
self.concat_z.append(dot(self.adj, self.concat_z[i], True))
h = tf.concat(self.concat_z, 1)
h = tf.nn.dropout(h, self.dropout)
logits = tf.layers.dense(
h, nb_classes,
activation=None,
kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False)
)
self.loss(logits, lr)

def accuracy(self, X, Y, mask, adj):
"""Get accuracy"""
return self.sess.run(self.acc,
feed_dict={self.adj: adj,
self.X: X,
self.y: Y,
self.mask: mask,
self.dropout: 1.0,
self.dropout_att: 1.0})

def train(self, batch_xs, batch_ys, mask, adj):
_ = self.sess.run(self.trains, feed_dict={self.y: batch_ys,
self.adj: adj,
self.X: batch_xs,
self.mask: mask,
self.dropout: self.prob,
self.dropout_att: self.prob_att})

def loss(self, logits, lr):
var = tf.trainable_variables()
lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in var if 'bias' not in v.name]) * self.reg

self.cost = masked_softmax_cross_entropy(logits, self.y, self.mask) + lossL2 + self.recon_loss

self.optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9)
self.trains = self.optimizer.minimize(self.cost)
self.acc = masked_accuracy(logits, self.y, self.mask)

self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())

def recon_reg(self, adj_idx, idx_size, z, n_nodes, beta):
idx_pos = np.random.choice(np.arange(len(adj_idx)), size=idx_size, replace=False)
node_self = tf.gather(z, adj_idx[idx_pos][:, 0])
neighborhood = tf.gather(z, adj_idx[idx_pos][:, 1])
dot_product = node_self * neighborhood
adj_pos = tf.nn.sigmoid(tf.reduce_sum(dot_product, axis=-1))
adj_one = tf.ones(idx_size)
positive = beta * (adj_one - adj_pos)

idx_neg = np.random.choice(np.arange(n_nodes), size=idx_size, replace=False)
negative_node = tf.gather(z, idx_neg)
dot_product = node_self * negative_node
adj_neg = tf.nn.sigmoid(tf.reduce_sum(dot_product, axis=-1))
negative = (1 - beta) * (adj_one - adj_neg)
return tf.reduce_sum(tf.maximum(0.0, positive - negative))/tf.constant(idx_size, dtype=tf.float32)

def get_loss(self, batch_xs, batch_ys, mask, adj ):
c = self.sess.run(self.cost, feed_dict={self.X: batch_xs,
self.y: batch_ys,
self.mask: mask,
self.dropout: 1.0,
self.dropout_att: 1.0,
self.adj: adj})
return c
87 changes: 87 additions & 0 deletions data_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import pickle as pkl
import scipy.sparse as sp
import numpy as np
import networkx as nx
import sys


def parse_index_file(filename):
"""Parse index file."""
index = []
for line in open(filename):
index.append(int(line.strip()))
return index


def sample_mask(idx, l):
"""Create mask."""
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)


def load_data(dataset_str):
"""
Loads input data from gcn/data directory
ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
(a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
object;
ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.
All objects above must be saved using python pickle module.
:param dataset_str: Dataset name
:return: All data input files loaded (as well the training/test data).
"""
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
if sys.version_info > (3, 0):
objects.append(pkl.load(f, encoding='latin1'))
else:
objects.append(pkl.load(f))

x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
test_idx_range = np.sort(test_idx_reorder)

if dataset_str == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range-min(test_idx_range), :] = ty
ty = ty_extended

features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]

idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)

train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])
test_mask = sample_mask(idx_test, labels.shape[0])

y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)
y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]

return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask

62 changes: 62 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from data_load import *
from GESM import *
from utils import *
n = 20
lr = 3e-3
layer = 64
layer_num = 15
dropout = 0.7
dropout_att = 0.3
reg = 3e-3
heads = 8
beta = 1.0

def train():
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data('cora')
N, D = features.shape
print('feature shape: ', (N, D))
features = preprocess_features(features)
print('feature preproecess ended')
adj = markov(adj)
print('adj preproecess ended')
print('y_train shape: ', y_train.shape)
print('test_set: ', np.sum(test_mask))
print('train_set: ',np.sum(train_mask))
print('dataset loading ended..')

tf.reset_default_graph()
model = GESM(layer, layer_num, heads, N, D, y_train.shape[1], lr, reg,
prob=dropout, prob_att=dropout_att, beta=beta,
adj_idx=adj[0], idx_size=int(len(adj[0])*0.05))

print('start training..')
min_val_loss = 100
max_val_acc = 0
val_acc_save = []
val_loss_save = []
for epoch in range(300):
model.train(features, y_train, train_mask, adj)
val_loss = model.get_loss(features, y_val, val_mask, adj)
val_acc = model.accuracy(features, y_val, val_mask, adj)
val_acc_save.append(val_acc)
val_loss_save.append(val_loss)
"""
Early stopping...
"""
if val_acc >= max_val_acc or val_loss <= min_val_loss:
if val_acc >= max_val_acc and val_loss <= min_val_loss:
test_acc = model.accuracy(features, y_test, test_mask, adj)
max_val_acc = np.max(val_acc_save)
min_val_loss = np.min(val_loss_save)
step_counter = 0
else:
step_counter += 1
if step_counter == n:
break
"""
Early stopping ended...
"""
print(test_acc * 100)



66 changes: 66 additions & 0 deletions run_cora.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import main "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"feature shape: (2708, 1433)\n",
"feature preproecess ended\n",
"adj preproecess ended\n",
"y_train shape: (2708, 7)\n",
"test_set: 1000\n",
"train_set: 140\n",
"dataset loading ended..\n",
"start training..\n",
"84.60000157356262\n"
]
}
],
"source": [
"main.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit e157eaa

Please sign in to comment.