diff --git a/docs/source/reference/core/properties.rst b/docs/source/reference/core/properties.rst index e1c10772a1..42c4f5f718 100644 --- a/docs/source/reference/core/properties.rst +++ b/docs/source/reference/core/properties.rst @@ -9,7 +9,7 @@ Properties :private-members: :inherited-members: -.. autoclass:: raphtory.ConstProperties +.. autoclass:: raphtory.ConstantProperties :autosummary: :members: :undoc-members: diff --git a/python/tests/graphql/misc/test_graphql_vectors.py b/python/tests/graphql/misc/test_graphql_vectors.py index 91bf5250e5..b88787a23d 100644 --- a/python/tests/graphql/misc/test_graphql_vectors.py +++ b/python/tests/graphql/misc/test_graphql_vectors.py @@ -72,8 +72,9 @@ def setup_server(work_dir): server = server.set_embeddings( cache="/tmp/graph-cache", embedding=embedding, - node_template="{{ name }}", - graph_template="{{ props.name }}", + nodes="{{ name }}", + graphs="{{ properties.name }}", + edges=False, ) return server diff --git a/python/tests/test_vectors.py b/python/tests/test_vectors.py index 6662a13763..734b30bfbf 100644 --- a/python/tests/test_vectors.py +++ b/python/tests/test_vectors.py @@ -49,7 +49,7 @@ def create_graph() -> VectorisedGraph: g.add_edge(4, "node3", "node4", {"name": "edge3"}) vg = g.vectorise( - embedding, node_template="{{ name }}", edge_template="{{ props.name }}" + embedding, nodes="{{ name }}", edges="{{ properties.name }}", graph=False ) return vg @@ -217,6 +217,24 @@ def test_filtering_by_entity_type(): contents = [doc.content for doc in selection.get_documents()] assert contents == ["edge1", "edge2", "edge3"] +def constant_embedding(texts): + return [[1, 0, 0] for text in texts] + +def test_default_template(): + g = Graph() + g.add_node(1, "node1") + g.add_edge(2, "node1", "node1") + + vg = g.vectorise(constant_embedding) + + node_docs = vg.nodes_by_similarity(query="whatever", limit=10).get_documents() + assert len(node_docs) == 1 + assert node_docs[0].content == "Node node1 has the following properties:\n" + + edge_docs = vg.edges_by_similarity(query="whatever", limit=10).get_documents() + assert len(edge_docs) == 1 + assert edge_docs[0].content == "There is an edge from node1 to node1 with events at:\n- Jan 1 1970 00:00\n" + ### MULTI-DOCUMENT VERSION TO BE RE-ENABLED diff --git a/raphtory-graphql/src/model/graph/property.rs b/raphtory-graphql/src/model/graph/property.rs index 07ea20ae2c..c095475f5a 100644 --- a/raphtory-graphql/src/model/graph/property.rs +++ b/raphtory-graphql/src/model/graph/property.rs @@ -61,8 +61,12 @@ fn prop_to_gql(prop: &Prop) -> GqlValue { Prop::I64(u) => GqlValue::Number(Number::from(*u)), Prop::U32(u) => GqlValue::Number(Number::from(*u)), Prop::U64(u) => GqlValue::Number(Number::from(*u)), - Prop::F32(u) => GqlValue::Number(Number::from_f64(*u as f64).unwrap()), - Prop::F64(u) => GqlValue::Number(Number::from_f64(*u).unwrap()), + Prop::F32(u) => Number::from_f64(*u as f64) + .map(|number| GqlValue::Number(number)) + .unwrap_or(GqlValue::Null), + Prop::F64(u) => Number::from_f64(*u as f64) + .map(|number| GqlValue::Number(number)) + .unwrap_or(GqlValue::Null), Prop::Bool(b) => GqlValue::Boolean(*b), Prop::List(l) => GqlValue::List(l.iter().map(|pp| prop_to_gql(pp)).collect()), Prop::Map(m) => GqlValue::Object( diff --git a/raphtory-graphql/src/paths.rs b/raphtory-graphql/src/paths.rs index 48d579b10c..7b8e70fc16 100644 --- a/raphtory-graphql/src/paths.rs +++ b/raphtory-graphql/src/paths.rs @@ -10,7 +10,7 @@ use raphtory::{ serialise::GraphFolder, }; -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct ExistingGraphFolder { folder: ValidGraphFolder, } diff --git a/raphtory-graphql/src/python/server/server.rs b/raphtory-graphql/src/python/server/server.rs index 83da81068c..a8f524163a 100644 --- a/raphtory-graphql/src/python/server/server.rs +++ b/raphtory-graphql/src/python/server/server.rs @@ -23,8 +23,14 @@ use pyo3::{ IntoPyObjectExt, }; use raphtory::{ - python::types::wrappers::document::PyDocument, - vectors::{embeddings::openai_embedding, template::DocumentTemplate, EmbeddingFunction}, + python::{packages::vectors::TemplateConfig, types::wrappers::document::PyDocument}, + vectors::{ + embeddings::openai_embedding, + template::{ + DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_GRAPH_TEMPLATE, DEFAULT_NODE_TEMPLATE, + }, + EmbeddingFunction, + }, }; use std::{collections::HashMap, path::PathBuf, sync::Arc, thread}; @@ -43,17 +49,17 @@ impl<'py> IntoPyObject<'py> for GraphServer { } fn template_from_python( - graph_template: Option, - node_template: Option, - edge_template: Option, + graphs: TemplateConfig, + nodes: TemplateConfig, + edges: TemplateConfig, ) -> Option { - if graph_template.is_none() && node_template.is_none() && edge_template.is_none() { + if graphs.is_disabled() && nodes.is_disabled() && edges.is_disabled() { None } else { Some(DocumentTemplate { - graph_template, - node_template, - edge_template, + graph_template: graphs.get_template_or(DEFAULT_GRAPH_TEMPLATE), + node_template: nodes.get_template_or(DEFAULT_NODE_TEMPLATE), + edge_template: edges.get_template_or(DEFAULT_EDGE_TEMPLATE), }) } } @@ -67,11 +73,11 @@ impl PyGraphServer { slf: PyRefMut, cache: String, embedding: F, - graph_template: Option, - node_template: Option, - edge_template: Option, + graphs: TemplateConfig, + nodes: TemplateConfig, + edges: TemplateConfig, ) -> PyResult { - let global_template = template_from_python(graph_template, node_template, edge_template); + let global_template = template_from_python(graphs, nodes, edges); let server = take_server_ownership(slf)?; let cache = PathBuf::from(cache); Ok(server.set_embeddings(embedding, &cache, global_template)) @@ -208,43 +214,31 @@ impl PyGraphServer { /// Arguments: /// cache (str): the directory to use as cache for the embeddings. /// embedding (Callable, optional): the embedding function to translate documents to embeddings. - /// graph_template (str, optional): the template to use for graphs. - /// node_template (str, optional): the template to use for nodes. - /// edge_template (str, optional): the template to use for edges. + /// graphs (bool | str): if graphs have to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided (defaults to True) /// /// Returns: /// GraphServer: A new server object with embeddings setup. #[pyo3( - signature = (cache, embedding = None, graph_template = None, node_template = None, edge_template = None) + signature = (cache, embedding = None, graphs = TemplateConfig::Bool(true), nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true)) )] fn set_embeddings( slf: PyRefMut, cache: String, embedding: Option>, - graph_template: Option, - node_template: Option, - edge_template: Option, + graphs: TemplateConfig, + nodes: TemplateConfig, + edges: TemplateConfig, ) -> PyResult { match embedding { Some(embedding) => { let embedding: Arc = Arc::new(embedding); - Self::set_generic_embeddings( - slf, - cache, - embedding, - graph_template, - node_template, - edge_template, - ) + Self::set_generic_embeddings(slf, cache, embedding, graphs, nodes, edges) + } + None => { + Self::set_generic_embeddings(slf, cache, openai_embedding, graphs, nodes, edges) } - None => Self::set_generic_embeddings( - slf, - cache, - openai_embedding, - graph_template, - node_template, - edge_template, - ), } } @@ -252,28 +246,27 @@ impl PyGraphServer { /// /// Arguments: /// graph_names (list[str]): the names of the graphs to vectorise. All by default. - /// graph_template (str, optional): the template to use for graphs. - /// node_template (str, optional): the template to use for nodes. - /// edge_template (str, optional): the template to use for edges. + /// graphs (bool | str): if graphs have to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided (defaults to True) /// /// Returns: /// GraphServer: A new server object containing the vectorised graphs. #[pyo3( - signature = (graph_names, graph_template = None, node_template = None, edge_template = None) + signature = (graph_names, graphs = TemplateConfig::Bool(true), nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true)) )] fn with_vectorised_graphs( slf: PyRefMut, graph_names: Vec, // TODO: support more models by just providing a string, e.g. "openai", here and in the VectorisedGraph API - graph_template: Option, - node_template: Option, - edge_template: Option, + graphs: TemplateConfig, + nodes: TemplateConfig, + edges: TemplateConfig, ) -> PyResult { - let template = template_from_python(graph_template, node_template, edge_template).ok_or( - PyAttributeError::new_err( + let template = + template_from_python(graphs, nodes, edges).ok_or(PyAttributeError::new_err( "some of graph_template, node_template, edge_template has to be set", - ), - )?; + ))?; let server = take_server_ownership(slf)?; Ok(server.with_vectorised_graphs(graph_names, template)) } diff --git a/raphtory/src/db/api/properties/constant_props.rs b/raphtory/src/db/api/properties/constant_props.rs index a40d995ce3..ca9dcea92d 100644 --- a/raphtory/src/db/api/properties/constant_props.rs +++ b/raphtory/src/db/api/properties/constant_props.rs @@ -5,12 +5,12 @@ use crate::{ use raphtory_api::core::storage::arc_str::ArcStr; use std::collections::HashMap; -pub struct ConstProperties<'a, P: ConstPropertiesOps> { +pub struct ConstantProperties<'a, P: ConstPropertiesOps> { pub(crate) props: P, _marker: std::marker::PhantomData<&'a P>, } -impl<'a, P: ConstPropertiesOps + Sync> ConstProperties<'a, P> { +impl<'a, P: ConstPropertiesOps + Sync> ConstantProperties<'a, P> { pub(crate) fn new(props: P) -> Self { Self { props, @@ -47,7 +47,7 @@ impl<'a, P: ConstPropertiesOps + Sync> ConstProperties<'a, P> { } } -impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstProperties<'a, P> { +impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstantProperties<'a, P> { type Item = (ArcStr, Prop); type IntoIter = BoxedLIter<'a, Self::Item>; @@ -60,7 +60,7 @@ impl<'a, P: ConstPropertiesOps + Sync + 'a> IntoIterator for ConstProperties<'a, } } -impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstProperties<'a, P> { +impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstantProperties<'a, P> { type Item = (ArcStr, Prop); type IntoIter = Box + 'a>; @@ -71,7 +71,7 @@ impl<'a, P: ConstPropertiesOps + Sync> IntoIterator for &'a ConstProperties<'a, } } -impl<'a, P: ConstPropertiesOps + Sync> PartialEq for ConstProperties<'a, P> { +impl<'a, P: ConstPropertiesOps + Sync> PartialEq for ConstantProperties<'a, P> { fn eq(&self, other: &Self) -> bool { self.as_map() == other.as_map() } diff --git a/raphtory/src/db/api/properties/dyn_props.rs b/raphtory/src/db/api/properties/dyn_props.rs index 50f6435fad..962a6f31e1 100644 --- a/raphtory/src/db/api/properties/dyn_props.rs +++ b/raphtory/src/db/api/properties/dyn_props.rs @@ -1,7 +1,7 @@ use crate::db::api::{ properties::{ internal::{InheritPropertiesOps, PropertiesOps}, - ConstProperties, Properties, TemporalProperties, TemporalPropertyView, + ConstantProperties, Properties, TemporalProperties, TemporalPropertyView, }, view::{internal::Static, DynamicGraph}, }; @@ -26,13 +26,13 @@ impl From> for DynProperties { } } -pub type DynConstProperties = ConstProperties<'static, DynProps>; +pub type DynConstProperties = ConstantProperties<'static, DynProps>; -impl From> +impl From> for DynConstProperties { - fn from(value: ConstProperties

) -> Self { - ConstProperties::new(Arc::new(value.props)) + fn from(value: ConstantProperties

) -> Self { + ConstantProperties::new(Arc::new(value.props)) } } diff --git a/raphtory/src/db/api/properties/props.rs b/raphtory/src/db/api/properties/props.rs index 58f513fc2e..21429432bc 100644 --- a/raphtory/src/db/api/properties/props.rs +++ b/raphtory/src/db/api/properties/props.rs @@ -1,7 +1,7 @@ use crate::{ core::Prop, db::api::properties::{ - constant_props::ConstProperties, internal::*, temporal_props::TemporalProperties, + constant_props::ConstantProperties, internal::*, temporal_props::TemporalProperties, }, }; use raphtory_api::core::storage::arc_str::ArcStr; @@ -69,8 +69,8 @@ impl Properties

{ } /// Get a view of the constant properties (meta-data) only. - pub fn constant<'a>(&self) -> ConstProperties<'a, P> { - ConstProperties::new(self.props.clone()) + pub fn constant<'a>(&self) -> ConstantProperties<'a, P> { + ConstantProperties::new(self.props.clone()) } /// Collect properties into vector diff --git a/raphtory/src/db/graph/edge.rs b/raphtory/src/db/graph/edge.rs index afa9a0dd29..bc20e6c241 100644 --- a/raphtory/src/db/graph/edge.rs +++ b/raphtory/src/db/graph/edge.rs @@ -277,7 +277,7 @@ impl /// /// # Arguments /// - /// * `props` - Property key-value pairs to add + /// * `properties` - Property key-value pairs to add /// * `layer` - The layer to which properties should be added. If the edge view is restricted to a /// single layer, 'None' will add the properties to that layer and 'Some("name")' /// fails unless the layer matches the edge view. If the edge view is not restricted @@ -285,7 +285,7 @@ impl /// sets the properties on layer '"name"' and fails if that layer doesn't exist. pub fn add_constant_properties( &self, - props: C, + properties: C, layer: Option<&str>, ) -> Result<(), GraphError> { let input_layer_id = self.resolve_layer(layer, false)?; @@ -300,7 +300,7 @@ impl dst: self.dst().name(), }); } - let properties: Vec<(usize, Prop)> = props.collect_properties(|name, dtype| { + let properties: Vec<(usize, Prop)> = properties.collect_properties(|name, dtype| { Ok(self.graph.resolve_edge_property(name, dtype, true)?.inner()) })?; diff --git a/raphtory/src/db/graph/node.rs b/raphtory/src/db/graph/node.rs index e5d9592d82..e72be8fda7 100644 --- a/raphtory/src/db/graph/node.rs +++ b/raphtory/src/db/graph/node.rs @@ -368,9 +368,9 @@ impl<'graph, G: GraphViewOps<'graph>, GH: GraphViewOps<'graph>> BaseNodeViewOps< impl NodeView { pub fn add_constant_properties( &self, - props: C, + properties: C, ) -> Result<(), GraphError> { - let properties: Vec<(usize, Prop)> = props.collect_properties(|name, dtype| { + let properties: Vec<(usize, Prop)> = properties.collect_properties(|name, dtype| { Ok(self.graph.resolve_node_property(name, dtype, true)?.inner()) })?; self.graph diff --git a/raphtory/src/db/graph/views/node_subgraph.rs b/raphtory/src/db/graph/views/node_subgraph.rs index a50a2e9324..5f66f94ea8 100644 --- a/raphtory/src/db/graph/views/node_subgraph.rs +++ b/raphtory/src/db/graph/views/node_subgraph.rs @@ -60,6 +60,7 @@ impl<'graph, G: GraphViewOps<'graph>> NodeSubgraph { nodes.collect() }; nodes.sort(); + nodes.dedup(); let nodes = Index::new(nodes, graph.unfiltered_num_nodes()); Self { graph, nodes } } @@ -132,7 +133,7 @@ mod subgraph_tests { graph.add_node(2, 2, NO_PROPS, None).unwrap(); test_storage!(&graph, |graph| { - let sg = graph.subgraph([1, 2]); + let sg = graph.subgraph([1, 2, 1]); // <- duplicated nodes should have no effect let actual = sg.materialize().unwrap().into_events().unwrap(); assert_graph_equal(&actual, &sg); diff --git a/raphtory/src/python/graph/properties/constant_props.rs b/raphtory/src/python/graph/properties/constant_props.rs index 9f843f7564..5c4f208d34 100644 --- a/raphtory/src/python/graph/properties/constant_props.rs +++ b/raphtory/src/python/graph/properties/constant_props.rs @@ -1,7 +1,7 @@ use crate::{ core::Prop, db::api::properties::{ - dyn_props::DynConstProperties, internal::PropertiesOps, ConstProperties, + dyn_props::DynConstProperties, internal::PropertiesOps, ConstantProperties, }, python::{ graph::properties::{ @@ -21,33 +21,33 @@ use raphtory_api::core::storage::arc_str::ArcStr; use std::{collections::HashMap, sync::Arc}; impl<'py, P: PropertiesOps + Send + Sync + 'static> IntoPyObject<'py> - for ConstProperties<'static, P> + for ConstantProperties<'static, P> { - type Target = PyConstProperties; + type Target = PyConstantProperties; type Output = Bound<'py, Self::Target>; type Error = >::Error; fn into_pyobject(self, py: Python<'py>) -> Result { - PyConstProperties::from(self).into_pyobject(py) + PyConstantProperties::from(self).into_pyobject(py) } } -impl<'a, P: PropertiesOps> Repr for ConstProperties<'a, P> { +impl<'a, P: PropertiesOps> Repr for ConstantProperties<'a, P> { fn repr(&self) -> String { format!("StaticProperties({{{}}})", iterator_dict_repr(self.iter())) } } /// A view of constant properties of an entity -#[pyclass(name = "ConstProperties", module = "raphtory", frozen)] -pub struct PyConstProperties { +#[pyclass(name = "ConstantProperties", module = "raphtory", frozen)] +pub struct PyConstantProperties { props: DynConstProperties, } -py_eq!(PyConstProperties, PyPropsComp); +py_eq!(PyConstantProperties, PyPropsComp); #[pymethods] -impl PyConstProperties { +impl PyConstantProperties { /// keys() -> list[str] /// /// lists the available property keys @@ -125,23 +125,23 @@ impl PyConstProperties { } } -impl From> - for PyConstProperties +impl From> + for PyConstantProperties { - fn from(value: ConstProperties

) -> Self { - PyConstProperties { - props: ConstProperties::new(Arc::new(value.props)), + fn from(value: ConstantProperties

) -> Self { + PyConstantProperties { + props: ConstantProperties::new(Arc::new(value.props)), } } } -impl Repr for PyConstProperties { +impl Repr for PyConstantProperties { fn repr(&self) -> String { self.props.repr() } } -py_iterable_base!(PyConstPropsList, DynConstProperties, PyConstProperties); +py_iterable_base!(PyConstPropsList, DynConstProperties, PyConstantProperties); py_eq!(PyConstPropsList, PyPropsListCmp); #[pymethods] @@ -196,7 +196,11 @@ impl PyConstPropsList { } } -py_nested_iterable_base!(PyConstPropsListList, DynConstProperties, PyConstProperties); +py_nested_iterable_base!( + PyConstPropsListList, + DynConstProperties, + PyConstantProperties +); py_eq!(PyConstPropsListList, PyConstPropsListListCmp); #[pymethods] diff --git a/raphtory/src/python/graph/properties/props.rs b/raphtory/src/python/graph/properties/props.rs index 71222b9be7..441e013451 100644 --- a/raphtory/src/python/graph/properties/props.rs +++ b/raphtory/src/python/graph/properties/props.rs @@ -10,7 +10,7 @@ use crate::{ }, python::{ graph::properties::{ - PyConstProperties, PyConstPropsList, PyConstPropsListList, PyTemporalPropsList, + PyConstPropsList, PyConstPropsListList, PyConstantProperties, PyTemporalPropsList, PyTemporalPropsListList, }, types::{ @@ -39,7 +39,7 @@ impl PartialEq for PyPropsComp { impl<'source> FromPyObject<'source> for PyPropsComp { fn extract_bound(ob: &Bound<'source, PyAny>) -> PyResult { - if let Ok(sp) = ob.extract::>() { + if let Ok(sp) = ob.extract::>() { Ok(sp.deref().into()) } else if let Ok(p) = ob.extract::>() { Ok(p.deref().into()) @@ -51,8 +51,8 @@ impl<'source> FromPyObject<'source> for PyPropsComp { } } -impl From<&PyConstProperties> for PyPropsComp { - fn from(value: &PyConstProperties) -> Self { +impl From<&PyConstantProperties> for PyPropsComp { + fn from(value: &PyConstantProperties) -> Self { Self(value.as_dict()) } } diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 28cb0b52f2..f5553fc283 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -14,7 +14,9 @@ use crate::{ graph_with_deletions::PyPersistentGraph, index::GraphIndex, node::{PyMutableNode, PyNode, PyNodes}, - properties::{PyConstProperties, PyProperties, PyTemporalProp, PyTemporalProperties}, + properties::{ + PyConstantProperties, PyProperties, PyTemporalProp, PyTemporalProperties, + }, views::graph_view::PyGraphView, }, packages::{ @@ -47,7 +49,7 @@ pub fn add_raphtory_classes(m: &Bound) -> PyResult<()> { PyEdges, PyMutableEdge, PyProperties, - PyConstProperties, + PyConstantProperties, PyTemporalProperties, PyTemporalProp, PyPropertyRef, diff --git a/raphtory/src/python/packages/vectors.rs b/raphtory/src/python/packages/vectors.rs index 7d72b7f37e..39cf5f1198 100644 --- a/raphtory/src/python/packages/vectors.rs +++ b/raphtory/src/python/packages/vectors.rs @@ -8,7 +8,9 @@ use crate::{ utils::{execute_async_task, PyNodeRef, PyTime}, }, vectors::{ - template::DocumentTemplate, + template::{ + DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_GRAPH_TEMPLATE, DEFAULT_NODE_TEMPLATE, + }, vector_selection::DynamicVectorSelection, vectorisable::Vectorisable, vectorised_graph::{DynamicVectorisedGraph, VectorisedGraph}, @@ -157,6 +159,34 @@ pub fn into_py_document( Ok(doc) } +#[derive(FromPyObject)] +pub enum TemplateConfig { + Bool(bool), + String(String), + // re-enable the code below to be able to customise the erro message + // #[pyo3(transparent)] + // CatchAll(Bound<'py, PyAny>), // This extraction never fails +} + +impl TemplateConfig { + pub fn get_template_or(self, default: &str) -> Option { + match self { + Self::Bool(vectorise) => { + if vectorise { + Some(default.to_owned()) + } else { + None + } + } + Self::String(custom_template) => Some(custom_template), + } + } + + pub fn is_disabled(&self) -> bool { + matches!(self, Self::Bool(false)) + } +} + #[pymethods] impl PyGraphView { /// Create a VectorisedGraph from the current graph @@ -165,33 +195,33 @@ impl PyGraphView { /// embedding (Callable[[list], list]): the embedding function to translate documents to embeddings /// cache (str): the file to be used as a cache to avoid calling the embedding function (optional) /// overwrite_cache (bool): whether or not to overwrite the cache if there are new embeddings (optional) - /// graph_template (str): the document template for the graphs (optional) - /// node_template (str): the document template for the nodes (optional) - /// edge_template (str): the document template for the edges (optional) + /// graph (bool | str): if the graph has to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided (defaults to True) + /// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided (defaults to True) /// verbose (bool): whether or not to print logs reporting the progress /// /// Returns: /// A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection - #[pyo3(signature = (embedding, cache = None, overwrite_cache = false, graph_template = None, node_template = None, edge_template = None, graph_name = None, verbose = false))] + #[pyo3(signature = (embedding, cache = None, overwrite_cache = false, graph = TemplateConfig::Bool(true), nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true), graph_name = None, verbose = false))] fn vectorise( &self, embedding: Bound, cache: Option, overwrite_cache: bool, - graph_template: Option, - node_template: Option, - edge_template: Option, + graph: TemplateConfig, + nodes: TemplateConfig, + edges: TemplateConfig, graph_name: Option, verbose: bool, ) -> PyResult { - let graph = self.graph.clone(); - let embedding = embedding.unbind(); - let cache = cache.map(|cache| cache.into()).into(); let template = DocumentTemplate { - graph_template, - node_template, - edge_template, + graph_template: graph.get_template_or(DEFAULT_GRAPH_TEMPLATE), + node_template: nodes.get_template_or(DEFAULT_NODE_TEMPLATE), + edge_template: edges.get_template_or(DEFAULT_EDGE_TEMPLATE), }; + let embedding = embedding.unbind(); + let cache = cache.map(|cache| cache.into()).into(); + let graph = self.graph.clone(); execute_async_task(move || async move { Ok(graph .vectorise( diff --git a/raphtory/src/vectors/mod.rs b/raphtory/src/vectors/mod.rs index ee34bf012f..f66efabf51 100644 --- a/raphtory/src/vectors/mod.rs +++ b/raphtory/src/vectors/mod.rs @@ -142,7 +142,9 @@ mod vector_tests { fn custom_template() -> DocumentTemplate { DocumentTemplate { graph_template: None, - node_template: Some("{{ name}} is a {{ node_type }} aged {{ props.age }}".to_owned()), + node_template: Some( + "{{ name}} is a {{ node_type }} aged {{ properties.age }}".to_owned(), + ), edge_template: Some( "{{ src.name }} appeared with {{ dst.name}} in lines: {{ history|join(', ') }}" .to_owned(), diff --git a/raphtory/src/vectors/template.rs b/raphtory/src/vectors/template.rs index ff1ffb0212..69d4916183 100644 --- a/raphtory/src/vectors/template.rs +++ b/raphtory/src/vectors/template.rs @@ -67,9 +67,9 @@ impl Object for PropUpdate { struct NodeTemplateContext { name: String, node_type: Option, - props: Value, - constant_props: Value, - temporal_props: Value, + properties: Value, + constant_properties: Value, + temporal_properties: Value, } impl<'graph, G: GraphViewOps<'graph>> From> for NodeTemplateContext { @@ -77,18 +77,18 @@ impl<'graph, G: GraphViewOps<'graph>> From> for NodeTemplateContext Self { name: value.name(), node_type: value.node_type(), - props: value + properties: value .properties() .iter() .map(|(key, value)| (key.to_string(), value.clone())) .collect(), - constant_props: value + constant_properties: value .properties() .constant() .iter() .map(|(key, value)| (key.to_string(), value.clone())) .collect(), - temporal_props: value + temporal_properties: value .properties() .temporal() .iter() @@ -100,27 +100,27 @@ impl<'graph, G: GraphViewOps<'graph>> From> for NodeTemplateContext #[derive(Serialize)] struct GraphTemplateContext { - props: Value, - constant_props: Value, - temporal_props: Value, + properties: Value, + constant_properties: Value, + temporal_properties: Value, } // FIXME: boilerplate for the properties impl<'graph, G: GraphViewOps<'graph>> From for GraphTemplateContext { fn from(value: G) -> Self { Self { - props: value + properties: value .properties() .iter() .map(|(key, value)| (key.to_string(), value.clone())) .collect(), - constant_props: value + constant_properties: value .properties() .constant() .iter() .map(|(key, value)| (key.to_string(), value.clone())) .collect(), - temporal_props: value + temporal_properties: value .properties() .temporal() .iter() @@ -180,7 +180,10 @@ impl DocumentTemplate { let mut env = Environment::new(); let template = build_template(&mut env, template); match template.render(GraphTemplateContext::from(graph)) { - Ok(document) => Box::new(std::iter::once(document.into())), + Ok(mut document) => { + truncate(&mut document); + Box::new(std::iter::once(document.into())) + } Err(error) => { error!("Template render failed for a node, skipping: {error}"); empty_iter() @@ -201,7 +204,10 @@ impl DocumentTemplate { let mut env = Environment::new(); let template = build_template(&mut env, template); match template.render(NodeTemplateContext::from(node)) { - Ok(document) => Box::new(std::iter::once(document.into())), + Ok(mut document) => { + truncate(&mut document); + Box::new(std::iter::once(document.into())) + } Err(error) => { error!("Template render failed for a node, skipping: {error}"); empty_iter() @@ -222,7 +228,10 @@ impl DocumentTemplate { let mut env = Environment::new(); let template = build_template(&mut env, template); match template.render(EdgeTemplateContext::from(edge)) { - Ok(document) => Box::new(std::iter::once(document.into())), + Ok(mut document) => { + truncate(&mut document); + Box::new(std::iter::once(document.into())) + } Err(error) => { error!("Template render failed for an edge, skipping: {error}"); empty_iter() @@ -234,6 +243,13 @@ impl DocumentTemplate { } } +fn truncate(text: &mut String) { + let limit = text.char_indices().nth(1000); + if let Some((index, _)) = limit { + text.truncate(index); + } +} + fn build_template<'a>(env: &'a mut Environment<'a>, template: &'a str) -> Template<'a, 'a> { minijinja_contrib::add_to_environment(env); env.add_filter("datetimeformat", datetimeformat); @@ -251,7 +267,7 @@ struct EdgeTemplateContext { dst: NodeTemplateContext, history: Vec, layers: Vec, - props: Value, + properties: Value, } impl<'graph, G: GraphViewOps<'graph>> From> for EdgeTemplateContext { @@ -265,7 +281,7 @@ impl<'graph, G: GraphViewOps<'graph>> From> for EdgeTemplateContext .into_iter() .map(|name| name.into()) .collect(), - props: value // FIXME: boilerplate + properties: value // FIXME: boilerplate .properties() .iter() .map(|(key, value)| (key.to_string(), value.clone())) @@ -274,14 +290,102 @@ impl<'graph, G: GraphViewOps<'graph>> From> for EdgeTemplateContext } } +pub const DEFAULT_NODE_TEMPLATE: &str = "Node {{ name }} {% if node_type is none %} has the following properties:{% else %} is a {{ node_type }} with the following properties:{% endif %} + +{% for (key, value) in constant_properties|items %} +{{ key }}: {{ value }} +{% endfor %} +{% for (key, values) in temporal_properties|items %} +{{ key }}: +{% for (time, value) in values %} + - changed to {{ value }} at {{ time|datetimeformat }} +{% endfor %} +{% endfor %}"; + +pub const DEFAULT_EDGE_TEMPLATE: &str = + "There is an edge from {{ src.name }} to {{ dst.name }} with events at: +{% for time in history %} +- {{ time|datetimeformat }} +{% endfor %}"; + +pub const DEFAULT_GRAPH_TEMPLATE: &str = "Graph with the following properties: +{% for (key, value) in constant_properties|items %} +{{ key }}: {{ value }} +{% endfor %} +{% for (key, values) in temporal_properties|items %} +{{ key }}: +{% for (time, value) in values %} + - changed to {{ value }} at {{ time|datetimeformat }} +{% endfor %} +{% endfor %}"; + #[cfg(test)] mod template_tests { use indoc::indoc; - use crate::prelude::{AdditionOps, Graph, GraphViewOps, NO_PROPS}; + use crate::prelude::{AdditionOps, Graph, GraphViewOps, PropertyAdditionOps, NO_PROPS}; use super::*; + #[test] + fn test_default_templates() { + let graph = Graph::new(); + graph + .add_constant_properties([("name", "test-name")]) + .unwrap(); + + let node1 = graph + .add_node(0, "node1", [("temp_test", "value_at_0")], None) + .unwrap(); + graph + .add_node(1, "node1", [("temp_test", "value_at_1")], None) + .unwrap(); + node1 + .add_constant_properties([("key1", "value1"), ("key2", "value2")]) + .unwrap(); + + for time in [0, 60_000] { + graph + .add_edge(time, "node1", "node2", NO_PROPS, Some("fancy-layer")) + .unwrap(); + } + + let template = DocumentTemplate { + node_template: Some(DEFAULT_NODE_TEMPLATE.to_owned()), + graph_template: Some(DEFAULT_GRAPH_TEMPLATE.to_owned()), + edge_template: Some(DEFAULT_EDGE_TEMPLATE.to_owned()), + }; + + let mut docs = template.node(graph.node("node1").unwrap()); + let rendered = docs.next().unwrap().content; + let expected = indoc! {" + Node node1 has the following properties: + key1: value1 + key2: value2 + temp_test: + - changed to value_at_0 at Jan 1 1970 00:00 + - changed to value_at_1 at Jan 1 1970 00:00 + "}; + assert_eq!(&rendered, expected); + + let mut docs = template.edge(graph.edge("node1", "node2").unwrap()); + let rendered = docs.next().unwrap().content; + let expected = indoc! {" + There is an edge from node1 to node2 with events at: + - Jan 1 1970 00:00 + - Jan 1 1970 00:01 + "}; + assert_eq!(&rendered, expected); + + let mut docs = template.graph(graph); + let rendered = docs.next().unwrap().content; + let expected = indoc! {" + Graph with the following properties: + name: test-name + "}; + assert_eq!(&rendered, expected); + } + #[test] fn test_node_template() { let graph = Graph::new(); @@ -302,20 +406,20 @@ mod template_tests { .add_constant_properties([("const_test", "const_test_value")]) .unwrap(); - // I should be able to iterate over props without doing props|items, which would be solved by implementing Object for Properties + // I should be able to iterate over properties without doing properties|items, which would be solved by implementing Object for Properties let node_template = indoc! {" - node {{ name }} is {% if node_type is none %}an unknown entity{% else %}a {{ node_type }}{% endif %} with the following props: - {% if props.const_test is defined %}const_test: {{ props.const_test }} {% endif %} - {% if temporal_props.temp_test is defined and temporal_props.temp_test|length > 0 %} + node {{ name }} is {% if node_type is none %}an unknown entity{% else %}a {{ node_type }}{% endif %} with the following properties: + {% if properties.const_test is defined %}const_test: {{ properties.const_test }} {% endif %} + {% if temporal_properties.temp_test is defined and temporal_properties.temp_test|length > 0 %} temp_test: - {% for (time, value) in temporal_props.temp_test %} + {% for (time, value) in temporal_properties.temp_test %} - changed to {{ value }} at {{ time }} {% endfor %} {% endif %} - {% for (key, value) in props|items if key != \"temp_test\" and key != \"const_test\" %} + {% for (key, value) in properties|items if key != \"temp_test\" and key != \"const_test\" %} {{ key }}: {{ value }} {% endfor %} - {% for (key, value) in constant_props|items if key != \"const_test\" %} + {% for (key, value) in constant_properties|items if key != \"const_test\" %} {{ key }}: {{ value }} {% endfor %} "}; @@ -328,7 +432,7 @@ mod template_tests { let mut docs = template.node(graph.node("node1").unwrap()); let rendered = docs.next().unwrap().content; let expected = indoc! {" - node node1 is an unknown entity with the following props: + node node1 is an unknown entity with the following properties: temp_test: - changed to value_at_0 at 0 - changed to value_at_1 at 1 @@ -342,7 +446,7 @@ mod template_tests { let mut docs = template.node(graph.node("node2").unwrap()); let rendered = docs.next().unwrap().content; let expected = indoc! {" - node node2 is a person with the following props: + node node2 is a person with the following properties: const_test: const_test_value"}; assert_eq!(&rendered, expected); } @@ -354,9 +458,9 @@ mod template_tests { .add_node("2024-09-09T09:08:01", "node1", [("temp", "value")], None) .unwrap(); - // I should be able to iteate over props without doing props|items, which would be solved by implementing Object for Properties + // I should be able to iteate over properties without doing properties|items, which would be solved by implementing Object for Properties let node_template = - "{{ (temporal_props.temp|first).time|datetimeformat(format=\"long\") }}"; + "{{ (temporal_properties.temp|first).time|datetimeformat(format=\"long\") }}"; let template = DocumentTemplate { node_template: Some(node_template.to_owned()), graph_template: None,