Skip to content

Commit

Permalink
Merge pull request #16 from Julian-Theis/development
Browse files Browse the repository at this point in the history
Sampling hyperamater k, variant log size and biased variant log investigations
  • Loading branch information
Julian-Theis authored Jul 1, 2021
2 parents 9f9a746 + 25aa0a1 commit ca3febe
Show file tree
Hide file tree
Showing 97 changed files with 1,572 additions and 203 deletions.
52 changes: 52 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.idea/
conf/settings.py

data/pns/*
!data/pns/.gitkeep
Expand All @@ -14,3 +15,54 @@ data/avatar/variants/*

data/avatar/train_data/*
!data/avatar/train_data/.gitkeep

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

# Sphinx documentation
docs/_build/
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ This leads conclusively to a closer expected generalization score when assessing
</center>

# How To
## Install
AVATAR has been developed using Python 3.6. The required Python libraries are available in the [requirements-cpu.txt](requirements-cpu.txt) and [requirements-gpu.txt](requirements-gpu.txt) file, depending if you plan on using a CPU or GPU environment.
Please run the [install/install_script.py](install/install_script.py) after creating your Python environment.

## Systems
### System Variant Playout
To playout a unique system variant log from e.g. *PA System 11 3*, simply run the following command from the base directory. The script will create the variant log of the system, a train variant log, and a test variant log as txt files. Moreover, the train variants are used to create a CSV and an XES based event log suitable for process discovery.
Expand Down Expand Up @@ -91,7 +95,7 @@ python -m avatar.sampling --system <...> --job <0/1> --gpu <...> --suffix <...>
Evaluate the sampling.

```python
python -m avatar.evaluation --system <...> --job <0/1> --gpu <...> --suffix <...> --strategy <naive/mh>
python -m avatar.evaluation --system <...> --job <0/1> --suffix <...> --strategy <naive/mh>
```

### Generalization
Expand Down
46 changes: 46 additions & 0 deletions avatar/cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import numpy as np
from tqdm import tqdm
import os
from os import listdir
from os.path import isfile, join
import argparse
from conf.settings import DATA_PATH

WORK_PATH = os.path.abspath(os.getcwd())

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-sfx', '--suffix', help='Suffix (selected epoch, e.g. 1981, to keep)', required=True)
parser.add_argument('-s', '--system', help='System name', required=True)
parser.add_argument('-j', '--job', help='Job (0,1)', required=True)

args = parser.parse_args()

system = args.system
suffix = int(args.suffix)
job = int(args.job)

if DATA_PATH is None:
directory = os.path.join(WORK_PATH, "data", "avatar", "sgans", system, str(job), "tf_logs", "ckpt")
else:
directory = os.path.join(DATA_PATH, "avatar", "sgans", system, str(job), "tf_logs", "ckpt")

allfiles = [f for f in listdir(directory) if isfile(join(directory, f))]
to_delete = []
for f in allfiles:
if f == "checkpoint" or "iw_dict_" in f:
continue

if ".pre_model-" in f:
to_delete.append(f)
continue

if system in f and ".adv_model-" in f:
index = int(str(f.split(system + ".adv_model-")[1]).split(".")[0])
if index != suffix:
to_delete.append(f)

for f in tqdm(to_delete):
os.remove(os.path.join(directory, f))

print("Done.")
28 changes: 21 additions & 7 deletions avatar/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import numpy as np
import os
import argparse
from util.playout import readVariantFile
from conf.settings import DATA_PATH

WORK_PATH = os.path.abspath(os.getcwd())

def readFile(f_name, unique=False):
traces = []
Expand Down Expand Up @@ -38,15 +42,25 @@ def intersection(lst1, lst2):
return list(set(ls1) & set(ls2))

def load_data(system, suffix, job, strategy=None):
f_train = "data/variants/" + system + "_train.txt"
f_test = "data/variants/" + system + "_test.txt"
f_pop = "data/variants/" + system + "_pop.txt"
f_eval = "data/avatar/train_data/" + system + "_eval.txt"

if strategy is None:
f_gan = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + ".txt"
if DATA_PATH is None:
f_train = os.path.join(WORK_PATH, "data", "variants", system + "_train.txt")
f_test = os.path.join(WORK_PATH, "data", "variants", system + "_test.txt")
f_pop = os.path.join(WORK_PATH, "data", "variants", system + "_pop.txt")
f_eval = os.path.join(WORK_PATH, "data", "avatar", "train_data", system + "_eval.txt")
if strategy is None:
f_gan = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + ".txt")
else:
f_gan = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt")
else:
f_gan = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt"
f_train = os.path.join(DATA_PATH, "variants", system + "_train.txt")
f_test = os.path.join(DATA_PATH, "variants", system + "_test.txt")
f_pop = os.path.join(DATA_PATH, "variants", system + "_pop.txt")
f_eval = os.path.join(DATA_PATH, "avatar", "train_data", system + "_eval.txt")
if strategy is None:
f_gan = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + ".txt")
else:
f_gan = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt")


train = readVariantFile(f_train, unique=False)
Expand Down
21 changes: 16 additions & 5 deletions avatar/generalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
from pm4py.evaluation.replay_fitness import factory as replay_factory
from pm4py.evaluation.precision import factory as precision_factory

from conf.settings import DATA_PATH

WORK_PATH = os.path.abspath(os.getcwd())

def readFile(f_name1, f_name2, unique=False):
traces = []

Expand Down Expand Up @@ -93,12 +97,19 @@ def convertToCsv(traces, to_path):
pn = args.pn
strategy = args.strategy

train_file = "data/variants/" + system + "_train.txt"
gen_file = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt"
csv_file = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.csv"
xes_file = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.xes"
if DATA_PATH is None:
train_file = os.path.join(WORK_PATH, "data", "variants", system + "_train.txt")
gen_file = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt")
csv_file = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.csv")
xes_file = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.xes")
pn_file = os.path.join(WORK_PATH, "data", "pns", system, pn)
else:
train_file = os.path.join(DATA_PATH, "variants", system + "_train.txt")
gen_file = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + ".txt")
csv_file = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.csv")
xes_file = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_" + strategy + "_generalization.xes")
pn_file = os.path.join(DATA_PATH, "pns", system, pn)

pn_file = os.path.join('data/pns', system, pn)

""" READ FILES AND CONVERT TO XES """
traces = readFile(train_file,gen_file, unique=True)
Expand Down
59 changes: 2 additions & 57 deletions avatar/relgan/real/real_gan/real_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,63 +40,8 @@ def real_train(generator, discriminator, oracle_loader, config, prob_discriminat
gen_file = os.path.join(sample_dir, 'generator.txt')
gen_text_file = os.path.join(sample_dir, 'generator_text.txt')
csv_file = os.path.join(log_dir, 'experiment-log-rmcgan.csv')
data_file = os.path.join(data_dir, '{}.txt'.format(dataset))
if dataset == 'image_coco':
test_file = os.path.join(data_dir, 'testdata/test_coco.txt')
elif dataset == 'emnlp_news':
test_file = os.path.join(data_dir, 'testdata/test_emnlp.txt')
elif dataset == 'logic1':
test_file = os.path.join(data_dir, 'logic1.txt')
elif dataset == 'pb_system_1_5':
test_file = os.path.join(data_dir, 'pb_system_1_5.txt')
elif dataset == 'pb_system_2_4':
test_file = os.path.join(data_dir, 'pb_system_2_4.txt')
elif dataset == 'pb_system_3_6':
test_file = os.path.join(data_dir, 'pb_system_3_6.txt')
elif dataset == 'pb_system_4_1':
test_file = os.path.join(data_dir, 'pb_system_4_1.txt')
elif dataset == 'pb_system_4_1_10':
test_file = os.path.join(data_dir, 'pb_system_4_1_10.txt')
elif dataset == 'pb_system_4_1_30':
test_file = os.path.join(data_dir, 'pb_system_4_1_30.txt')
elif dataset == 'pb_system_4_1_50':
test_file = os.path.join(data_dir, 'pb_system_4_1_50.txt')
elif dataset == 'pb_system_5_3':
test_file = os.path.join(data_dir, 'pb_system_5_3.txt')

elif dataset == 'pb_system_5_3_b':
test_file = os.path.join(data_dir, 'pb_system_5_3_b.txt')
elif dataset == 'pb_system_1_5_b':
test_file = os.path.join(data_dir, 'pb_system_1_5_b.txt')
elif dataset == 'pb_system_2_4_b':
test_file = os.path.join(data_dir, 'pb_system_2_4_b.txt')
elif dataset == 'pb_system_3_6_b':
test_file = os.path.join(data_dir, 'pb_system_3_6_b.txt')
elif dataset == 'pb_system_4_1_b':
test_file = os.path.join(data_dir, 'pb_system_4_1_b.txt')

elif dataset == 'pa_system_1_5':
test_file = os.path.join(data_dir, 'pa_system_1_5.txt')
elif dataset == 'pa_system_2_3':
test_file = os.path.join(data_dir, 'pa_system_2_3.txt')
elif dataset == 'pa_system_4_3':
test_file = os.path.join(data_dir, 'pa_system_4_3.txt')
elif dataset == 'pa_system_5_5':
test_file = os.path.join(data_dir, 'pa_system_5_5.txt')
elif dataset == 'pa_system_6_3':
test_file = os.path.join(data_dir, 'pa_system_6_3.txt')
elif dataset == 'pa_system_7_7':
test_file = os.path.join(data_dir, 'pa_system_7_7.txt')
elif dataset == 'pa_system_8_3':
test_file = os.path.join(data_dir, 'pa_system_8_3.txt')
elif dataset == 'pa_system_9_1':
test_file = os.path.join(data_dir, 'pa_system_9_1.txt')
elif dataset == 'pa_system_10_2':
test_file = os.path.join(data_dir, 'pa_system_10_2.txt')
elif dataset == 'pa_system_11_3':
test_file = os.path.join(data_dir, 'pa_system_11_3.txt')
else:
raise NotImplementedError('Unknown dataset!')
data_file = os.path.join(data_dir, "..", "train_data", '{}.txt'.format(dataset))
test_file = os.path.join(data_dir, "..", "train_data", '{}.txt'.format(dataset))

# create necessary directories
if not os.path.exists(data_dir):
Expand Down
24 changes: 20 additions & 4 deletions avatar/relgan/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from avatar.relgan.utils.text_process import text_precess
from avatar.relgan.utils.utils import pp
import avatar.relgan.models as models
from conf.settings import DATA_PATH

WORK_PATH = os.path.abspath(os.getcwd())

def main(given_args=None):
parser = argparse.ArgumentParser(description='Train and run a RelGAN')
Expand Down Expand Up @@ -60,7 +63,15 @@ def main(given_args=None):
parser.add_argument('--gen-emb-dim', default=32, type=int, help="generator embedding dimension")
parser.add_argument('--dis-emb-dim', default=64, type=int, help="TOTAL discriminator embedding dimension")
parser.add_argument('--num-rep', default=64, type=int, help="number of discriminator embedded representations")
parser.add_argument('--data-dir', default='/data/julian/data/relgan/data', type=str,

if DATA_PATH is None:
def_dir = os.path.join(WORK_PATH, "data", "avatar", "sgans")
else:
def_dir = os.path.join(DATA_PATH, "avatar", "sgans")

parser.add_argument('--data-dir',
default=def_dir,
type=str,
help='Where data data is stored')

if given_args is None:
Expand All @@ -72,12 +83,17 @@ def main(given_args=None):

print(config)

data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset))
#data_file = os.path.join(args.data_dir, "..", "train_data", '{}.txt'.format(args.dataset))

if DATA_PATH is None:
seq_vocab_file = os.path.join(WORK_PATH, "data", "variants", str(args.dataset) + "_train.txt")
else:
seq_vocab_file = os.path.join(DATA_PATH, "variants", str(args.dataset) + "_train.txt")

if args.dataset == 'pb_system_4_1_10':
if args.dataset == 'pb_system_4_1_10' or '_s10' in args.dataset:
args.batch_size = 32

seq_len, vocab_size = text_precess(data_file)
seq_len, vocab_size = text_precess(seq_vocab_file)
config['seq_len'] = seq_len
config['vocab_size'] = vocab_size
print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size))
Expand Down
19 changes: 15 additions & 4 deletions avatar/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
from avatar.util.LoadRelgan import LoadRelgan
from avatar.util.MHGAN import MHGAN
from avatar.util.util import writeToFile, readTraces
from conf.settings import DATA_PATH

WORK_PATH = os.path.abspath(os.getcwd())

if __name__ == "__main__":
np.random.seed(seed=1234)
Expand Down Expand Up @@ -43,7 +46,7 @@
mh_count = int(args.mh_count)
mh_patience = int(args.mh_patience)
mh_k = int(args.mh_count)
mh_maxiter = int(args.mh_mi)
mh_maxiter = int(args.mh_maxiter)

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

Expand All @@ -52,16 +55,24 @@
print("****** SAMPLE FOR SUFFIX ", suffix, " ******")
relgan = LoadRelgan(system=system, suffix=suffix, job=job)

f_out = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_naive.txt"
if DATA_PATH is None:
f_out = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_naive.txt")
else:
f_out = os.path.join(DATA_PATH, "avatar", "variants",system + "_relgan_" + str(suffix) + "_j" + str(job) + "_naive.txt")

print("Start NAIVE SAMPLING")
gen_samples = relgan.generate(n_samples=n_samples)
print("Generated samples - shape:", gen_samples.shape)
print("Writing to file", f_out)
writeToFile(relgan, f_out, gen_samples)

elif strategy == "mh":
eval_path = "data/avatar/train_data/" + system + "_eval.txt"
f_out = "data/avatar/variants/" + system + "_relgan_" + str(suffix) + "_j" + str(job) + "_mh.txt"
if DATA_PATH is None:
eval_path = os.path.join(WORK_PATH, "data", "avatar", "train_data", system + "_eval.txt")
f_out = os.path.join(WORK_PATH, "data", "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_mh.txt")
else:
eval_path = os.path.join(DATA_PATH, "avatar", "train_data", system + "_eval.txt")
f_out = os.path.join(DATA_PATH, "avatar", "variants", system + "_relgan_" + str(suffix) + "_j" + str(job) + "_mh.txt")

tf.reset_default_graph()
print("****** SAMPLE FOR SUFFIX ", suffix, " ******")
Expand Down
Loading

0 comments on commit ca3febe

Please sign in to comment.