init

yusufsahin99 · May 24, 2022 · 016956a · 016956a
commit 016956a
Show file tree

Hide file tree

Showing 192 changed files with 15,788 additions and 0 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+# Exclude notebook from language statistics
+*.ipynb linguist-documentation
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,162 @@
+# CUSTOM
+.vscode/
+scripts/pcqm4m/**/*.zip
+scripts/pcqm4m/**/*.sdf
+scripts/pcqm4m/**/*.xyz
+scripts/pcqm4m/**/*.csv
+!scripts/pcqm4m/**/periodic_table.csv
+scripts/pcqm4m/**/*.gz
+scripts/pcqm4m/**/*.tsv
+scripts/pcqm4m/pcqm4m-v2/
+slurm_history/
+datasets/
+pretrained/
+results/
+vocprep/benchmark_RELEASE/
+vocprep/voc_viz_files/
+vocprep/VOC/benchmark_RELEASE/
+vocprep/VOC/*.tgz
+vocprep/VOC/*.pickle
+vocprep/VOC/*.pkl
+vocprep/VOC/*.zip
+splits/
+wandb/
+__pycache__/
+.idea
+*.log
+*.bak
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# vim edit buffer
+*.swp
+
+
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Ladislav Rampášek, Michael Galkin, Vijay Prakash Dwivedi, Dominique Beaini
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,79 @@
+# GraphGPS: General Powerful Scalable Graph Transformers
+
+How to build a graph Transformer? We provide a 3-part recipe on how to build graph Transformers with linear complexity. Our GPS recipe consists of choosing 3 main ingredients:
+1. positional/structural encoding,
+2. local message-passing mechanism,
+3. global attention mechanism.
+
+In this *GraphGPS* package we provide several positional/structural encodings and model choices, implementing the GPS recipe. GraphGPS is built using [PyG](https://www.pyg.org/) and [GraphGym from PyG2](https://pytorch-geometric.readthedocs.io/en/2.0.0/notes/graphgym.html).
+Specifically PyG v2.0.2 is required.
+
+
+### Python environment setup with Conda
+
+```bash
+conda create -n graphgps python=3.9
+conda activate graphgps
+
+conda install pytorch=1.9 torchvision torchaudio -c pytorch -c nvidia
+conda install pyg=2.0.2 -c pyg -c conda-forge
+conda install pandas scikit-learn
+
+# RDKit is required for OGB-LSC PCQM4Mv2 and datasets derived from it.  
+conda install openbabel fsspec rdkit -c conda-forge
+
+pip install performer-pytorch
+pip install torchmetrics==0.7.2
+pip install ogb
+pip install wandb
+
+conda clean --all
+```
+
+
+### Running GraphGPS
+```bash
+conda activate graphgps
+
+# Running GPS with RWSE and tuned hyperparameters for ZINC.
+python main.py --cfg configs/GPS/zinc-GPS+RWSE.yaml  wandb.use False
+
+# Running config with tuned SAN hyperparams for ZINC.
+python main.py --cfg configs/SAN/zinc-SAN.yaml  wandb.use False
+
+# Running a debug/dev config for ZINC.
+python main.py --cfg tests/configs/graph/zinc.yaml  wandb.use False
+```
+
+
+### Benchmarking GPS on 11 datasets
+See `run/run_experiments.sh` script to run multiple random seeds per each of the 11 datasets. We rely on Slurm job scheduling system.
+
+Alternatively, you can run them in terminal following the example below. Configs for all 11 datasets are in `configs/GPS/`.
+```bash
+conda activate graphgps
+# Run 10 repeats with 10 different random seeds (0..9):
+python main.py --cfg configs/GPS/zinc-GPS+RWSE.yaml  --repeat 10  wandb.use False
+# Run a particular random seed:
+python main.py --cfg configs/GPS/zinc-GPS+RWSE.yaml  --repeat 1  seed 42  wandb.use False
+```
+
+
+### W&B logging
+To use W&B logging, set `wandb.use True` and have a `gtransformers` entity set-up in your W&B account (or change it to whatever else you like by setting `wandb.entity`).
+
+
+
+## Unit tests
+
+To run all unit tests, execute from the project root directory:
+
+```bash
+python -m unittest -v
+```
+
+Or specify a particular test module, e.g.:
+
+```bash
+python -m unittest -v unittests.test_eigvecs
+```
diff --git a/configs/GPS/cifar10-GPS+RWSE.yaml b/configs/GPS/cifar10-GPS+RWSE.yaml
@@ -0,0 +1,61 @@
+out_dir: results
+metric_best: accuracy
+wandb:
+  use: True
+  project: CIFAR10
+dataset:
+  format: PyG-GNNBenchmarkDataset
+  name: CIFAR10
+  task: graph
+  task_type: classification
+  transductive: False
+  node_encoder: True
+  node_encoder_name: RWSE
+  node_encoder_bn: False
+  edge_encoder: True
+  edge_encoder_name: LinearEdge
+  edge_encoder_bn: False
+posenc_RWSE:
+  enable: True
+  kernel:
+    times_func: range(1,17)
+  model: Linear
+  dim_pe: 16
+  raw_norm_type: BatchNorm
+train:
+  mode: custom
+  batch_size: 16
+  eval_period: 1
+  ckpt_period: 100
+model:
+  type: GPSModel
+  loss_fun: cross_entropy
+  edge_decoding: dot
+  graph_pooling: mean
+gt:  # Hyperparameters optimized for ~100k budget.
+  layer_type: CustomGatedGCN+Transformer
+  layers: 3
+  n_heads: 4
+  dim_hidden: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
+  dropout: 0.0
+  attn_dropout: 0.5
+  layer_norm: False
+  batch_norm: True
+gnn:
+  head: default
+  layers_pre_mp: 0
+  layers_post_mp: 2
+  dim_inner: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
+  batchnorm: False
+  act: relu
+  dropout: 0.0
+  agg: mean
+  normalize_adj: False
+optim:
+  clip_grad_norm: True
+  optimizer: adamW
+  weight_decay: 1e-5
+  base_lr: 0.001
+  max_epoch: 100
+  scheduler: cosine_with_warmup
+  num_warmup_epochs: 5
diff --git a/configs/GPS/cifar10-GPS+SNDS.yaml b/configs/GPS/cifar10-GPS+SNDS.yaml
@@ -0,0 +1,66 @@
+out_dir: results
+metric_best: accuracy
+wandb:
+  use: True
+  project: CIFAR10
+dataset:
+  format: PyG-GNNBenchmarkDataset
+  name: CIFAR10
+  task: graph
+  task_type: classification
+  transductive: False
+  node_encoder: True
+  node_encoder_name: SignNet
+  node_encoder_bn: False
+  edge_encoder: True
+  edge_encoder_name: LinearEdge
+  edge_encoder_bn: False
+posenc_SignNet:
+  enable: True
+  eigen:
+    laplacian_norm: none
+    eigvec_norm: L2
+    max_freqs: 16  # Max graph size in CIFAR10 is 150, but they are 8-NN graphs
+  model: DeepSet
+  dim_pe: 16  # Note: In original SignNet codebase dim_pe is always equal to max_freq
+  layers: 8  # Num. layers in \phi model
+  post_layers: 3  # Num. layers in \rho model; The original uses the same as in \phi
+  phi_hidden_dim: 64
+  phi_out_dim: 64
+train:
+  mode: custom
+  batch_size: 16
+  eval_period: 1
+  ckpt_period: 100
+model:
+  type: GPSModel
+  loss_fun: cross_entropy
+  edge_decoding: dot
+  graph_pooling: mean
+gt:
+  layer_type: CustomGatedGCN+Transformer
+  layers: 3
+  n_heads: 4
+  dim_hidden: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
+  dropout: 0.0
+  attn_dropout: 0.5
+  layer_norm: False
+  batch_norm: True
+gnn:
+  head: default
+  layers_pre_mp: 0
+  layers_post_mp: 2
+  dim_inner: 52  # `gt.dim_hidden` must match `gnn.dim_inner`
+  batchnorm: False
+  act: relu
+  dropout: 0.0
+  agg: mean
+  normalize_adj: False
+optim:
+  clip_grad_norm: True
+  optimizer: adamW
+  weight_decay: 1e-5
+  base_lr: 0.001
+  max_epoch: 100
+  scheduler: cosine_with_warmup
+  num_warmup_epochs: 5
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Exclude notebook from language statistics
		*.ipynb linguist-documentation