Skip to content

Commit

Permalink
Release 1.0.0rc1
Browse files Browse the repository at this point in the history
  • Loading branch information
habiba-h committed Apr 22, 2020
2 parents 55a4ba3 + 682513a commit 625a99b
Show file tree
Hide file tree
Showing 218 changed files with 12,563 additions and 3,942 deletions.
26 changes: 22 additions & 4 deletions .buildkite/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@ services:
dockerfile: docker/stellargraph-ci-runner/Dockerfile
args:
PYTHON_VERSION: "3.6"
# this can be set at the build level to have the whole build use pre-release versions of
# dependencies
PRERELEASE_VERSIONS: &prerelease "${PRERELEASE_VERSIONS:-0}"
working_dir: /workdir
volumes:
volumes: &runner-volumes
- $PWD:/workdir
# We need to manually bring in buildkite-agent
# (https://github.com/buildkite-plugins/docker-compose-buildkite-plugin/issues/158)
- ${BUILDKITE_AGENT_BINARY_PATH:-/usr/bin/buildkite-agent}:/usr/bin/buildkite-agent
environment:
environment: &runner-environment
- PYTHONDONTWRITEBYTECODE=1
# make sure that no new uses of the legacy constructor are added (see also: test in
# test_graph.py, filterwarnings in pytest.ini)
Expand All @@ -37,18 +40,33 @@ services:
- BUILDKITE_COMMIT
- BUILDKITE_TAG
- CODECOV_TOKEN
- NEO4J_VERSION
depends_on:
- neo4j
- ${RUNNER_DEPENDS_ON:-empty-service}

runner-3_7:
<<: *runner-config
build:
<<: *runner-build
args:
PYTHON_VERSION: "3.7"
PRERELEASE_VERSIONS: *prerelease

# a service that does nothing to use as a default depends_on for the runners
empty-service:
image: alpine:3.10

neo4j:
build: docker/stellargraph-neo4j
build:
context: docker/stellargraph-neo4j
args:
- NEO4J_VERSION
environment:
# this is running entirely locally on a CI machine, so authentication is unnecessary
NEO4J_AUTH: "none"

conda:
image: continuumio/anaconda3
working_dir: /workdir
volumes: *runner-volumes
environment: *runner-environment
35 changes: 32 additions & 3 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ steps:
<<: *timeout
key: "test-notebooks"
depends_on: "runner-3_6"
parallelism: 43
parallelism: 45
command: ".buildkite/steps/test-demo-notebooks.sh"
plugins:
<<: *plugins
Expand All @@ -95,9 +95,9 @@ steps:
soft_fail:
- exit_status: 2

- label: ":python::book: test neo4j notebooks"
- &test-neo4j-notebooks
label: ":python::book: test neo4j notebooks against neo4j 4.0"
<<: *timeout
key: "test-neo4j-notebooks"
depends_on: "runner-3_6"
command: ".buildkite/steps/test-neo4j-notebooks.sh"
plugins:
Expand All @@ -107,9 +107,18 @@ steps:
run: runner-3_6
env:
- STELLARGRAPH_NEO4J_HOST=neo4j
env:
RUNNER_DEPENDS_ON: neo4j
NEO4J_VERSION: "4.0"
agents:
queue: "t2large"

- <<: *test-neo4j-notebooks
label: ":python::book: test neo4j notebooks against neo4j 3.5"
env:
RUNNER_DEPENDS_ON: neo4j
NEO4J_VERSION: "3.5"

- label: ":python-black: format"
<<: *timeout
plugins:
Expand Down Expand Up @@ -188,6 +197,26 @@ steps:
<<: *timeout
command: "scripts/whitespace.sh --ci"

- label: ":book: check demo table"
command: "python scripts/demo_table.py --action=compare"
<<: *timeout
plugins:
<<: *plugins
docker#v3.5.0:
image: "python:3.6"
propagate_environment: true

- label: ":snake: conda build"
<<: *timeout
command: ".buildkite/steps/conda-build.sh"
plugins:
<<: *plugins
docker-compose#v3.2.0:
<<: *compose-config
run: conda
agents:
queue: "t2medium"

- label: ":docker: build image"
<<: *timeout
plugins:
Expand Down
10 changes: 10 additions & 0 deletions .buildkite/steps/conda-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

set -xeo pipefail

echo "+++ :snake: :construction_worker: conda build"
conda build . --no-anaconda-upload

echo "+++ :snake::buildkite: upload package"
conda_package="$(conda build . --output)"
buildkite-agent artifact upload "${conda_package}"
7 changes: 3 additions & 4 deletions .buildkite/steps/test-demo-notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,16 @@ f=${NOTEBOOKS[$INDEX]}
case $(basename "$f") in
'attacks_clustering_analysis.ipynb' | 'hateful-twitters-interpretability.ipynb' | 'hateful-twitters.ipynb' | 'stellargraph-attri2vec-DBLP.ipynb' | \
'node-link-importance-demo-gat.ipynb' | 'node-link-importance-demo-gcn.ipynb' | 'node-link-importance-demo-gcn-sparse.ipynb' | 'rgcn-aifb-node-classification-example.ipynb' | \
'stellargraph-metapath2vec.ipynb')
'stellargraph-metapath2vec.ipynb' | 'gcn-lstm-LA.ipynb')
# These notebooks do not yet work on CI:
# FIXME #818: datasets can't be downloaded
# FIXME #819: out-of-memory
# FIXME #849: CI does not have neo4j
# FIXME #907: socialcomputing.asu.edu is down
# FIXME #1303: METR_LA dataset can't be downloaded automatically
echo "+++ :python: :skull_and_crossbones: skipping $f"
exit 2 # this will be a soft-fail for buildkite
;;

'directed-graphsage-on-cora-neo4j-example.ipynb' | 'undirected-graphsage-on-cora-neo4j-example.ipynb' | 'load-cora-into-neo4j.ipynb')
'loading-saving-neo4j.ipynb' | 'directed-graphsage-on-cora-neo4j-example.ipynb' | 'undirected-graphsage-on-cora-neo4j-example.ipynb' | 'load-cora-into-neo4j.ipynb')
# these are tested separately (see test-neo4j-notebooks.sh)
echo "+++ :python: skipping Neo4j notebook $f"
exit 0
Expand Down
9 changes: 7 additions & 2 deletions .buildkite/steps/test-neo4j-notebooks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ echo "--- listing dependency versions"
pip freeze

directory="$PWD/demos/connector/neo4j"
notebooks=('load-cora-into-neo4j.ipynb' 'directed-graphsage-on-cora-neo4j-example.ipynb' 'undirected-graphsage-on-cora-neo4j-example.ipynb')
notebooks=(
"../../basics/loading-saving-neo4j.ipynb"
"load-cora-into-neo4j.ipynb"
"directed-graphsage-on-cora-neo4j-example.ipynb"
"undirected-graphsage-on-cora-neo4j-example.ipynb"
)

for name in "${notebooks[@]}"; do
.buildkite/steps/test-single-notebook.sh "$directory/$name"
.buildkite/steps/test-single-notebook.sh "$directory/$name" " using Neo4j ${NEO4J_VERSION}"
done
5 changes: 3 additions & 2 deletions .buildkite/steps/test-single-notebook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set -xeo pipefail

stellargraph_dir="$PWD"
f="$1"
extra_info="${2-}"

echo "+++ :python: running $f"
cd "$(dirname "$f")"
Expand Down Expand Up @@ -36,8 +37,8 @@ echo "This notebook can be viewed at <$url>"

if [ "$exitCode" -ne 0 ]; then
# the notebook failed, so let's flag that more obviously, with helpful links
buildkite-agent annotate --style "error" --context "$filename" << EOF
Notebook \`$filename\` had an error: [failed job](#${BUILDKITE_JOB_ID}), [rendered notebook]($url)
buildkite-agent annotate --style "error" --context "$filename-${BUILDKITE_JOB_ID}" << EOF
Notebook \`$filename\` had an error${extra_info}: [failed job](#${BUILDKITE_JOB_ID}), [rendered notebook]($url)
EOF
fi

Expand Down
4 changes: 1 addition & 3 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,7 @@ site
.buildkite/
.github/

# docker configuration isn't needed inside the image
docker/
# buildkite creates a docker-compose.buildkite-...-override.yml file
# that changes each build, that also needs to be ignored
# that changes each build, that needs to be ignored
docker-compose*.yml
.dockerignore
68 changes: 68 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,73 @@
# Change Log

## [1.0.0rc1](https://github.com/stellargraph/stellargraph/tree/v1.0.0rc1)

[Full Changelog](https://github.com/stellargraph/stellargraph/compare/v0.11.0...v1.0.0rc1)

This is the first release candidate for StellarGraph 1.0. The 1.0 release will be the culmination of 2 years of activate development, and this release candidate is the first milestone for that release.

Jump in to this release, with the new demos and examples:

- [More helpful indexing and guidance in demo READMEs](demos/)
- [Loading from Neo4j][neo4j]
- [More explanatory Node2Vec link prediction][n2v-lp]
- [Unsupervised `GraphSAGE` and `HinSAGE` via `DeepGraphInfomax`][dgi]
- [Graph classification with `GCNSupervisedGraphClassification`][gc]
- [Time series prediction using spatial information, using `GraphConvolutionLSTM`][gcn-lstm] (experimental)

[neo4j]: demos/basics/loading-saving-neo4j.ipynb
[n2v-lp]: demos/link-prediction/random-walks/cora-lp-demo.ipynb
[dgi]: demos/embeddings/deep-graph-infomax-cora.ipynb
[gc]: demos/graph-classification/supervised-graph-classification.ipynb
[gcn-lstm]: demos/spatio-temporal/gcn-lstm-LA.ipynb

### Major features and improvements

- Better demonstration notebooks and documentation to make the library more accessible to new and existing users:
- The [demos READMEs](demos/) now contain more guidance and explanation to make it easier to find a relevant example [\#1200](https://github.com/stellargraph/stellargraph/pull/1200)
- A [demo for loading data from Neo4j][neo4j] has been added [\#1184](https://github.com/stellargraph/stellargraph/pull/1184)
- The [demo for link prediction using Node2Vec][n2v-lp] has been rewritten to be clearer [\#1190](https://github.com/stellargraph/stellargraph/pull/1190)
- Notebooks are [now included in the API documentation](https://stellargraph.readthedocs.io/en/latest/demos/index.html), for more convenient access [\#1279](https://github.com/stellargraph/stellargraph/pull/1279)
- Notebooks now detect if they're being used with an incorrect version of the StellarGraph library, elimanting confusion about version mismatches [\#1242](https://github.com/stellargraph/stellargraph/pull/1242)
- New algorithms:
- `GCNSupervisedGraphClassification`: supervised graph classification model based on Graph Convolutional layers (GCN) [\#929](https://github.com/stellargraph/stellargraph/issues/929), [demo][gc].
- `DeepGraphInfomax` can be used to train almost any model in an unsupervised way, via the `corrupt_index_groups` parameter to `CorruptedGenerator` [\#1243](https://github.com/stellargraph/stellargraph/pull/1243), [demo][dgi]. Additionally, many algorithms provide defaults and so can be used with `DeepGraphInfomax` without specifying this parameter:
- any model using `FullBatchNodeGenerator`, including models supported in StellarGraph 0.11: `GCN`, `GAT`, `PPNP` and `APPNP`
- `GraphSAGE` [\#1162](https://github.com/stellargraph/stellargraph/pull/1162)
- `HinSAGE` for heterogeneous graphs with node features [\#1254](https://github.com/stellargraph/stellargraph/pull/1254)
- `UnsupervisedSampler` supports a `walker` parameter to use other random walking algorithms such as `BiasedRandomWalk`, in addition to the default `UniformRandomWalk`. [\#1187](https://github.com/stellargraph/stellargraph/pull/1187)
- The `StellarGraph` class is now smaller, faster and easier to construct:
- The `StellarGraph(..., edge_type_column=...)` parameter can be used to construct a heterogeneous graph from a single flat `DataFrame`, containing a column of the edge types [\#1284](https://github.com/stellargraph/stellargraph/pull/1284). This avoids the need to build separate `DataFrame`s for each type, and is significantly faster when there are many types. Using `edge_type_column` gives a 2.6× speedup for loading the `stellargraph.datasets.FB15k` dataset (with almost 600 thousand edges across 1345 types).
- `StellarGraph`'s internal cache of node adjacencies now uses the smallest integer type it can [\#1289](https://github.com/stellargraph/stellargraph/pull/1289). This reduces memory use by 31% on the `FB15k` dataset, and 36% on a reddit dataset (with 11.6 million edges).

### Breaking changes

- Edge weights are now validated to be numeric when creating a `StellarGraph`, previously edge weights could be any type, but all algorithms that use them would fail. [\#1191](https://github.com/stellargraph/stellargraph/pull/1191)
- Full batch layers no longer support an "output indices" tensor to filter the output rows to a selected set of nodes [\#1204](https://github.com/stellargraph/stellargraph/pull/1204) (this does **not** affect models like `GCN`, only the layers within them: `APPNPPropagationLayer`, `ClusterGraphConvolution`, `GraphConvolution`, `GraphAttention`, `GraphAttentionSparse`, `PPNPPropagationLayer`, `RelationalGraphConvolution`). Migration: post-process the output using `tf.gather` manually or the new `sg.layer.misc.GatherIndices` layer.
- `GraphConvolution` has been generalised to work with batch size > 1, subsuming the functionality of the now-deprecated `ClusterGraphConvolution` (and `GraphClassificationConvolution`) [\#1205](https://github.com/stellargraph/stellargraph/pull/1205). Migration: replace `stellargraph.layer.ClusterGraphConvolution` with `stellargraph.layer.GraphConvolution`.

### Experimental features

Some new algorithms and features are still under active development, and are available as an experimental preview. However, they may not be easy to use: their documentation or testing may be incomplete, and they may change dramatically from release to release. The experimental status is noted in the documentation and at runtime via prominent warnings.

- `SortPooling` layer: the node pooling layer introduced in [Zhang et al](https://www.cse.wustl.edu/~muhan/papers/AAAI_2018_DGCNN.pdf) [\#1210](https://github.com/stellargraph/stellargraph/pull/1210)
- `DeepGraphConvolutionalNeuralNetwork` (DGCNN): supervised graph classification using a stack of graph convolutional layers followed by `SortPooling`, and standard convolutional and pooling (such as `Conv1D` and `MaxPool1D`) [\#1212](https://github.com/stellargraph/stellargraph/pull/1212) [\#1265](https://github.com/stellargraph/stellargraph/pull/1265)
- `GraphConvolutionLSTM`: time series prediction on spatio-temporal data, combining GCN with a [LSTM](https://en.wikipedia.org/wiki/Long_short-term_memory) model to augment the conventional time-series model with information from nearby data points [\#1085](https://github.com/stellargraph/stellargraph/pull/1085), [demo][gcn-lstm]

### Bug fixes and other changes

- Random walk classes like `UniformRandomWalk` and `BiasedRandomWalk` can have their hyperparameters set on construction, in addition to in each call to `run` [\#1179](https://github.com/stellargraph/stellargraph/pull/1179)
- Node feature sampling was made ~4× faster by ensuring a better data layout, this makes some configurations of `GraphSAGE` (and `HinSAGE`) noticably faster [\#1225](https://github.com/stellargraph/stellargraph/pull/1225)
- The `PROTEINS` dataset has been added to `stellargraph.datasets`, for graph classification [\#1282](https://github.com/stellargraph/stellargraph/pull/1282)
- The `BlogCatalog3` dataset can now be successfully downloaded again [\#1283](https://github.com/stellargraph/stellargraph/pull/1283)
- Knowledge graph model evaluation via `rank_edges_against_all_nodes` now defaults to the `random` strategy for breaking ties, and supports `top` (previous default) and `bottom` as alternatives [\#1223](https://github.com/stellargraph/stellargraph/pull/1223)
- Creating a `RelationalFullBatchNodeGenerator` is now significantly faster and requires much less memory (18× speedup and 560× smaller for the `stellargraph.datasets.AIFB` dataset) [\#1274](https://github.com/stellargraph/stellargraph/pull/1274)
- `StellarGraph.info` now shows a summary of the edge weights for each edge type [\#1240](https://github.com/stellargraph/stellargraph/pull/1240)
- Various documentation, demo and error message fixes and improvements: [\#1141](https://github.com/stellargraph/stellargraph/pull/1141), [\#1219](https://github.com/stellargraph/stellargraph/pull/1219), [\#1246](https://github.com/stellargraph/stellargraph/pull/1246), [\#1260](https://github.com/stellargraph/stellargraph/pull/1260), [\#1266](https://github.com/stellargraph/stellargraph/pull/1266)
- DevOps changes:
- CI: [\#1161](https://github.com/stellargraph/stellargraph/pull/1161), [\#1189](https://github.com/stellargraph/stellargraph/pull/1189), [\#1230](https://github.com/stellargraph/stellargraph/pull/1230), [\#1122](https://github.com/stellargraph/stellargraph/pull/1122)
- Other: [\#1197](https://github.com/stellargraph/stellargraph/pull/1197)

## [0.11.1](https://github.com/stellargraph/stellargraph/tree/v0.11.1)

[Full Changelog](https://github.com/stellargraph/stellargraph/compare/v0.11.0...v0.11.1)
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ The StellarGraph library offers state-of-the-art algorithms for [graph machine l
- Link prediction;
- [Interpretation of node classification](https://medium.com/stellargraph/https-medium-com-stellargraph-saliency-maps-for-graph-machine-learning-5cca536974da) [8].

Graph-structured data represent entities as nodes (or vertices) and relationships between them as edges (or links), and can data include data associated with either as attributes. For example, a graph can contain people as nodes and friendships between them as links, with data like a person's age and the date a friendship was established. StellarGraph supports analysis of many kinds of graphs:
Graph-structured data represent entities as nodes (or vertices) and relationships between them as edges (or links), and can include data associated with either as attributes. For example, a graph can contain people as nodes and friendships between them as links, with data like a person's age and the date a friendship was established. StellarGraph supports analysis of many kinds of graphs:

- homogeneous (with nodes and links of one type),
- heterogeneous (with more than one type of nodes and/or links)
Expand Down Expand Up @@ -184,6 +184,7 @@ The StellarGraph library currently includes the following algorithms for graph m
| Deep Graph Infomax [15] | Deep Graph Infomax trains unsupervised GNNs to maximize the shared information between node level and graph level features. |
| Continuous-Time Dynamic Network Embeddings (CTDNE) [16] | Supports time-respecting random walks which can be used in a similar way as in Node2Vec for unsupervised representation learning. |
| DistMult [17] | The DistMult algorithm computes embeddings for nodes (entities) and edge types (relations) in knowledge graphs, and can use these for link prediction |
| DGCNN [18] | The Deep Graph Convolutional Neural Network (DGCNN) algorithm for supervised graph classification. |

## Installation

Expand Down Expand Up @@ -298,3 +299,5 @@ International Conference on Machine Learning (ICML), 2019. ([link](https://arxiv
16. Continuous-Time Dynamic Network Embeddings. Giang Hoang Nguyen, John Boaz Lee, Ryan A. Rossi, Nesreen K. Ahmed, Eunyee Koh, and Sungchul Kim. Proceedings of the 3rd International Workshop on Learning Representations for Big Networks (WWW BigNet) 2018. ([link](https://dl.acm.org/doi/10.1145/3184558.3191526))

17. Embedding Entities and Relations for Learning and Inference in Knowledge Bases. Bishan Yang, Wen-tau Yih, Xiaodong He, Jianfeng Gao, and Li Deng, ICLR, 2015. arXiv:1412.6575 ([link](https://arxiv.org/pdf/1412.6575))

18. An End-to-End Deep Learning Architecture for Graph Classification. Muhan Zhang, Zhicheng Cui, Marion Neumann, and Yixin Chen, AAAI, 2018. ([link](https://www.cse.wustl.edu/~muhan/papers/AAAI_2018_DGCNN.pdf))
Loading

0 comments on commit 625a99b

Please sign in to comment.