diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5ef2b10..81d9415 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,10 +1,12 @@ +# Rely on pre-commit.ci instead name: Lint via pre-commit on: - pull_request: - push: - branches-ignore: - - main + workflow_dispatch: + # pull_request: + # push: + # branches-ignore: + # - main permissions: contents: read diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 892b0c7..7b06df1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.12.2 + rev: v0.13 hooks: - id: validate-pyproject name: Validate pyproject.toml @@ -40,7 +40,7 @@ repos: hooks: - id: isort - repo: https://github.com/asottile/pyupgrade - rev: v3.3.2 + rev: v3.4.0 hooks: - id: pyupgrade args: [--py38-plus] @@ -55,7 +55,7 @@ repos: - id: black # - id: black-jupyter - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.264 + rev: v0.0.269 hooks: - id: ruff args: [--fix-only, --show-fixes] @@ -66,7 +66,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.0.0 - - flake8-bugbear==23.3.23 + - flake8-bugbear==23.5.9 - flake8-simplify==0.20.0 - repo: https://github.com/asottile/yesqa rev: v1.4.0 @@ -81,7 +81,7 @@ repos: additional_dependencies: [tomli] files: ^(graphblas_algorithms|docs)/ - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.264 + rev: v0.0.269 hooks: - id: ruff # `pyroma` may help keep our package standards up to date if best practices change. diff --git a/README.md b/README.md index adc236a..ed66df3 100644 --- a/README.md +++ b/README.md @@ -112,93 +112,138 @@ The following NetworkX algorithms have been implemented by graphblas-algorithms and can be used following the dispatch pattern shown above. -- Boundary - - edge_boundary - - node_boundary -- Centrality - - degree_centrality - - eigenvector_centrality - - in_degree_centrality - - katz_centrality - - out_degree_centrality -- Cluster - - average_clustering - - clustering - - generalized_degree - - square_clustering - - transitivity - - triangles -- Community - - inter_community_edges - - intra_community_edges -- Components - - is_connected - - is_weakly_connected - - node_connected_component -- Core - - k_truss -- Cuts - - boundary_expansion - - conductance - - cut_size - - edge_expansion - - mixing_expansion - - node_expansion - - normalized_cut_size - - volume -- DAG - - ancestors - - descendants -- Dominating - - is_dominating_set -- Generators - - ego_graph -- Isolate - - is_isolate - - isolates - - number_of_isolates -- Link Analysis - - google_matrix - - hits - - pagerank -- Operators - - compose - - difference - - disjoint_union - - full_join - - intersection - - symmetric_difference - - union -- Reciprocity - - overall_reciprocity - - reciprocity -- Regular - - is_k_regular - - is_regular -- Shortest Paths - - all_pairs_bellman_ford_path_length - - all_pairs_shortest_path_length - - bellman_ford_path - - floyd_warshall - - floyd_warshall_numpy - - floyd_warshall_predecessor_and_distance - - has_path - - negative_edge_cycle - - single_source_bellman_ford_path_length - - single_source_shortest_path_length - - single_target_shortest_path_length -- Simple Paths - - is_simple_path -- S Metric - - s_metric -- Structural Holes - - mutual_weight -- Tournament - - is_tournament - - score_sequence - - tournament_matrix -- Traversal - - bfs_layers - - descendants_at_distance -- Triads - - is_triad +[//]: # (Begin auto-generated code) + +``` +graphblas_algorithms.nxapi +├── boundary +│ ├── edge_boundary +│ └── node_boundary +├── centrality +│ ├── degree_alg +│ │ ├── degree_centrality +│ │ ├── in_degree_centrality +│ │ └── out_degree_centrality +│ ├── eigenvector +│ │ └── eigenvector_centrality +│ └── katz +│ └── katz_centrality +├── cluster +│ ├── average_clustering +│ ├── clustering +│ ├── generalized_degree +│ ├── square_clustering +│ ├── transitivity +│ └── triangles +├── community +│ └── quality +│ ├── inter_community_edges +│ └── intra_community_edges +├── components +│ ├── connected +│ │ ├── is_connected +│ │ └── node_connected_component +│ └── weakly_connected +│ └── is_weakly_connected +├── core +│ └── k_truss +├── cuts +│ ├── boundary_expansion +│ ├── conductance +│ ├── cut_size +│ ├── edge_expansion +│ ├── mixing_expansion +│ ├── node_expansion +│ ├── normalized_cut_size +│ └── volume +├── dag +│ ├── ancestors +│ └── descendants +├── dominating +│ └── is_dominating_set +├── efficiency_measures +│ └── efficiency +├── generators +│ └── ego +│ └── ego_graph +├── isolate +│ ├── is_isolate +│ ├── isolates +│ └── number_of_isolates +├── isomorphism +│ └── isomorph +│ ├── fast_could_be_isomorphic +│ └── faster_could_be_isomorphic +├── linalg +│ ├── bethehessianmatrix +│ │ └── bethe_hessian_matrix +│ ├── graphmatrix +│ │ └── adjacency_matrix +│ ├── laplacianmatrix +│ │ ├── laplacian_matrix +│ │ └── normalized_laplacian_matrix +│ └── modularitymatrix +│ ├── directed_modularity_matrix +│ └── modularity_matrix +├── link_analysis +│ ├── hits_alg +│ │ └── hits +│ └── pagerank_alg +│ ├── google_matrix +│ └── pagerank +├── lowest_common_ancestors +│ └── lowest_common_ancestor +├── operators +│ ├── binary +│ │ ├── compose +│ │ ├── difference +│ │ ├── disjoint_union +│ │ ├── full_join +│ │ ├── intersection +│ │ ├── symmetric_difference +│ │ └── union +│ └── unary +│ ├── complement +│ └── reverse +├── reciprocity +│ ├── overall_reciprocity +│ └── reciprocity +├── regular +│ ├── is_k_regular +│ └── is_regular +├── shortest_paths +│ ├── dense +│ │ ├── floyd_warshall +│ │ ├── floyd_warshall_numpy +│ │ └── floyd_warshall_predecessor_and_distance +│ ├── generic +│ │ └── has_path +│ ├── unweighted +│ │ ├── all_pairs_shortest_path_length +│ │ ├── single_source_shortest_path_length +│ │ └── single_target_shortest_path_length +│ └── weighted +│ ├── all_pairs_bellman_ford_path_length +│ ├── bellman_ford_path +│ ├── bellman_ford_path_length +│ ├── negative_edge_cycle +│ └── single_source_bellman_ford_path_length +├── simple_paths +│ └── is_simple_path +├── smetric +│ └── s_metric +├── structuralholes +│ └── mutual_weight +├── tournament +│ ├── is_tournament +│ ├── score_sequence +│ └── tournament_matrix +├── traversal +│ └── breadth_first_search +│ ├── bfs_layers +│ └── descendants_at_distance +└── triads + └── is_triad +``` + +[//]: # (End auto-generated code) diff --git a/environment.yml b/environment.yml index 9342aa4..06142d1 100644 --- a/environment.yml +++ b/environment.yml @@ -42,6 +42,8 @@ dependencies: - pydot - pygraphviz - sympy + # For updating algorithm list in README + - rich # For linting - pre-commit # For testing diff --git a/graphblas_algorithms/__init__.py b/graphblas_algorithms/__init__.py index 09418f5..e86efa9 100644 --- a/graphblas_algorithms/__init__.py +++ b/graphblas_algorithms/__init__.py @@ -1,9 +1,10 @@ import importlib.metadata from .classes import * +from .generators import * +from .linalg import * from .algorithms import * # isort:skip -from .generators import * # isort:skip try: __version__ = importlib.metadata.version("graphblas-algorithms") diff --git a/graphblas_algorithms/algorithms/__init__.py b/graphblas_algorithms/algorithms/__init__.py index 37a47a3..be06324 100644 --- a/graphblas_algorithms/algorithms/__init__.py +++ b/graphblas_algorithms/algorithms/__init__.py @@ -8,8 +8,11 @@ from .cuts import * from .dag import * from .dominating import * +from .efficiency_measures import * from .isolate import * +from .isomorphism import * from .link_analysis import * +from .lowest_common_ancestors import * from .operators import * from .reciprocity import * from .regular import * diff --git a/graphblas_algorithms/algorithms/_bfs.py b/graphblas_algorithms/algorithms/_bfs.py index 49ba735..8189aae 100644 --- a/graphblas_algorithms/algorithms/_bfs.py +++ b/graphblas_algorithms/algorithms/_bfs.py @@ -11,16 +11,25 @@ def _get_cutoff(n, cutoff): return cutoff + 1 # Inclusive -def _bfs_plain(G, source=None, target=None, *, index=None, cutoff=None): +# Push-pull optimization is possible, but annoying to implement +def _bfs_plain( + G, source=None, target=None, *, index=None, cutoff=None, transpose=False, name="bfs_plain" +): if source is not None: + if source not in G._key_to_id: + raise KeyError(f"The node {source} is not in the graph") index = G._key_to_id[source] if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") dst_id = G._key_to_id[target] else: dst_id = None A = G.get_property("offdiag") + if transpose and G.is_directed(): + A = A.T # TODO: should we use "AT" instead? n = A.nrows - v = Vector(bool, n, name="bfs_plain") + v = Vector(bool, n, name=name) q = Vector(bool, n, name="q") v[index] = True q[index] = True @@ -30,16 +39,22 @@ def _bfs_plain(G, source=None, target=None, *, index=None, cutoff=None): q(~v.S, replace) << any_pair_bool(q @ A) if q.nvals == 0: break + v(q.S) << True if dst_id is not None and dst_id in q: break - v(q.S) << True return v -def _bfs_level(G, source, cutoff=None, *, transpose=False, dtype=int): +def _bfs_level(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): if dtype == bool: dtype = int index = G._key_to_id[source] + if target is not None: + if target not in G._key_to_id: + raise KeyError(f"The node {target} is not in the graph") + dst_id = G._key_to_id[target] + else: + dst_id = None A = G.get_property("offdiag") if transpose and G.is_directed(): A = A.T # TODO: should we use "AT" instead? @@ -55,10 +70,12 @@ def _bfs_level(G, source, cutoff=None, *, transpose=False, dtype=int): if q.nvals == 0: break v(q.S) << i + if dst_id is not None and dst_id in q: + break return v -def _bfs_levels(G, nodes, cutoff=None, *, dtype=int): +def _bfs_levels(G, nodes, *, cutoff=None, dtype=int): if dtype == bool: dtype = int A = G.get_property("offdiag") @@ -90,7 +107,7 @@ def _bfs_levels(G, nodes, cutoff=None, *, dtype=int): return D -def _bfs_parent(G, source, cutoff=None, *, target=None, transpose=False, dtype=int): +def _bfs_parent(G, source, target=None, *, cutoff=None, transpose=False, dtype=int): if dtype == bool: dtype = int index = G._key_to_id[source] diff --git a/graphblas_algorithms/algorithms/dag.py b/graphblas_algorithms/algorithms/dag.py index c75dae1..63eb560 100644 --- a/graphblas_algorithms/algorithms/dag.py +++ b/graphblas_algorithms/algorithms/dag.py @@ -1,41 +1,17 @@ -from graphblas import Vector, replace -from graphblas.semiring import any_pair +from ._bfs import _bfs_plain __all__ = ["descendants", "ancestors"] -# Push-pull optimization is possible, but annoying to implement def descendants(G, source): - if source not in G._key_to_id: - raise KeyError(f"The node {source} is not in the graph") + rv = _bfs_plain(G, source, name="descendants") index = G._key_to_id[source] - A = G.get_property("offdiag") - q = Vector(bool, size=A.nrows, name="q") - q[index] = True - rv = q.dup(name="descendants") - any_pair_bool = any_pair[bool] - for _i in range(A.nrows): - q(~rv.S, replace) << any_pair_bool(q @ A) - if q.nvals == 0: - break - rv(q.S) << True del rv[index] return rv def ancestors(G, source): - if source not in G._key_to_id: - raise KeyError(f"The node {source} is not in the graph") + rv = _bfs_plain(G, source, transpose=True, name="ancestors") index = G._key_to_id[source] - A = G.get_property("offdiag") - q = Vector(bool, size=A.nrows, name="q") - q[index] = True - rv = q.dup(name="descendants") - any_pair_bool = any_pair[bool] - for _i in range(A.nrows): - q(~rv.S, replace) << any_pair_bool(A @ q) - if q.nvals == 0: - break - rv(q.S) << True del rv[index] return rv diff --git a/graphblas_algorithms/algorithms/efficiency_measures.py b/graphblas_algorithms/algorithms/efficiency_measures.py new file mode 100644 index 0000000..3d922ee --- /dev/null +++ b/graphblas_algorithms/algorithms/efficiency_measures.py @@ -0,0 +1,12 @@ +from .exceptions import NoPath +from .shortest_paths.unweighted import bidirectional_shortest_path_length + +__all__ = ["efficiency"] + + +def efficiency(G, u, v): + try: + eff = 1 / bidirectional_shortest_path_length(G, u, v) + except NoPath: + eff = 0 + return eff diff --git a/graphblas_algorithms/algorithms/isomorphism/__init__.py b/graphblas_algorithms/algorithms/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/algorithms/isomorphism/isomorph.py b/graphblas_algorithms/algorithms/isomorphism/isomorph.py new file mode 100644 index 0000000..12e5af4 --- /dev/null +++ b/graphblas_algorithms/algorithms/isomorphism/isomorph.py @@ -0,0 +1,56 @@ +import numpy as np +from graphblas import binary + +from ..cluster import triangles + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + t1 = triangles(G1) + t2 = triangles(G2) + if t1.nvals != t2.nvals: + return False + # Make ds and ts the same shape as numpy arrays so we can sort them lexicographically. + if t1.nvals != d1.nvals: + # Assign 0 to t1 where present in d1 but not t1 + t1(~t1.S) << binary.second(d1, 0) + if t2.nvals != d2.nvals: + # Assign 0 to t2 where present in d2 but not t2 + t2(~t2.S) << binary.second(d2, 0) + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + t1 = t1.to_coo(indices=False)[1] + t2 = t2.to_coo(indices=False)[1] + ind1 = np.lexsort((d1, t1)) + ind2 = np.lexsort((d2, t2)) + if not np.array_equal(d1[ind1], d2[ind2]): + return False + if not np.array_equal(t1[ind1], t2[ind2]): + return False + return True + + +def faster_could_be_isomorphic(G1, G2): + if len(G1) != len(G2): + return False + d1 = G1.get_property("total_degrees+" if G1.is_directed() else "degrees+") + d2 = G2.get_property("total_degrees+" if G2.is_directed() else "degrees+") + if d1.nvals != d2.nvals: + return False + d1 = d1.to_coo(indices=False)[1] + d2 = d2.to_coo(indices=False)[1] + d1.sort() + d2.sort() + if not np.array_equal(d1, d2): + return False + return True diff --git a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py index d665e98..7391dbe 100644 --- a/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py +++ b/graphblas_algorithms/algorithms/link_analysis/pagerank_alg.py @@ -1,4 +1,3 @@ -import numpy as np from graphblas import Matrix, Vector, binary, monoid from graphblas.semiring import plus_first, plus_times @@ -113,7 +112,6 @@ def google_matrix( A = G._A ids = G.list_to_ids(nodelist) if ids is not None: - ids = np.array(ids, np.uint64) A = A[ids, ids].new(float, name=name) else: A = A.dup(float, name=name) diff --git a/graphblas_algorithms/algorithms/lowest_common_ancestors.py b/graphblas_algorithms/algorithms/lowest_common_ancestors.py new file mode 100644 index 0000000..0dfac19 --- /dev/null +++ b/graphblas_algorithms/algorithms/lowest_common_ancestors.py @@ -0,0 +1,21 @@ +from graphblas import binary, replace +from graphblas.semiring import any_pair + +from ._bfs import _bfs_plain + +__all__ = ["lowest_common_ancestor"] + + +def lowest_common_ancestor(G, node1, node2, default=None): + common_ancestors = _bfs_plain(G, node1, name="common_ancestors", transpose=True) + other_ancestors = _bfs_plain(G, node2, name="other_ancestors", transpose=True) + common_ancestors << binary.pair(common_ancestors & other_ancestors) + if common_ancestors.nvals == 0: + return default + # Take one BFS step along predecessors. The lowest common ancestor is one we don't visit. + # An alternative strategy would be to walk along successors until there are no more. + other_ancestors(common_ancestors.S, replace) << any_pair[bool](G._A @ common_ancestors) + common_ancestors(~other_ancestors.S, replace) << common_ancestors + index = common_ancestors.to_coo(values=False)[0][0] + # XXX: should we return index or key? + return G.id_to_key[index] diff --git a/graphblas_algorithms/algorithms/operators/__init__.py b/graphblas_algorithms/algorithms/operators/__init__.py index 6e308b5..c2742b9 100644 --- a/graphblas_algorithms/algorithms/operators/__init__.py +++ b/graphblas_algorithms/algorithms/operators/__init__.py @@ -1 +1,2 @@ from .binary import * +from .unary import * diff --git a/graphblas_algorithms/algorithms/operators/binary.py b/graphblas_algorithms/algorithms/operators/binary.py index b2b19ca..11b5b19 100644 --- a/graphblas_algorithms/algorithms/operators/binary.py +++ b/graphblas_algorithms/algorithms/operators/binary.py @@ -1,4 +1,3 @@ -import numpy as np from graphblas import Matrix, binary, dtypes, unary from ..exceptions import GraphBlasAlgorithmException @@ -63,9 +62,9 @@ def intersection(G, H, *, name="intersection"): if G.is_multigraph(): raise NotImplementedError("Not yet implemented for multigraphs") keys = sorted(G._key_to_id.keys() & H._key_to_id.keys(), key=G._key_to_id.__getitem__) - ids = np.array(G.list_to_ids(keys), np.uint64) + ids = G.list_to_ids(keys) A = G._A[ids, ids].new() - ids = np.array(H.list_to_ids(keys), np.uint64) + ids = H.list_to_ids(keys) B = H._A[ids, ids].new(dtypes.unify(A.dtype, H._A.dtype), mask=A.S, name=name) B << unary.one(B) return type(G)(B, key_to_id=dict(zip(keys, range(len(keys))))) @@ -84,7 +83,7 @@ def difference(G, H, *, name="difference"): else: # Need to perform a permutation keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) - ids = np.array(H.list_to_ids(keys), np.uint64) + ids = H.list_to_ids(keys) B = H._A[ids, ids].new() C = unary.one(A).new(mask=~B.S, name=name) return type(G)(C, key_to_id=G._key_to_id) @@ -103,7 +102,7 @@ def symmetric_difference(G, H, *, name="symmetric_difference"): else: # Need to perform a permutation keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) - ids = np.array(H.list_to_ids(keys), np.uint64) + ids = H.list_to_ids(keys) B = H._A[ids, ids].new() Mask = binary.pair[bool](A & B).new(name="mask") C = binary.pair(A | B, left_default=True, right_default=True).new(mask=~Mask.S, name=name) @@ -121,7 +120,7 @@ def compose(G, H, *, name="compose"): if G._key_to_id != H._key_to_id: # Need to perform a permutation keys = sorted(G._key_to_id, key=G._key_to_id.__getitem__) - ids = np.array(H.list_to_ids(keys), np.uint64) + ids = H.list_to_ids(keys) B = B[ids, ids].new() C = binary.second(A | B).new(name=name) key_to_id = G._key_to_id @@ -135,11 +134,11 @@ def compose(G, H, *, name="compose"): name=name, ) C[: A.nrows, : A.ncols] = A - ids1 = np.array(G.list_to_ids(keys), np.uint64) - ids2 = np.array(H.list_to_ids(keys), np.uint64) + ids1 = G.list_to_ids(keys) + ids2 = H.list_to_ids(keys) C[ids1, ids1] = B[ids2, ids2] newkeys = sorted(H._key_to_id.keys() - G._key_to_id.keys(), key=H._key_to_id.__getitem__) - ids = np.array(H.list_to_ids(newkeys), np.uint64) + ids = H.list_to_ids(newkeys) C[A.nrows :, A.ncols :] = B[ids, ids] # Now make new `key_to_id` ids += A.nrows diff --git a/graphblas_algorithms/algorithms/operators/unary.py b/graphblas_algorithms/algorithms/operators/unary.py new file mode 100644 index 0000000..e7c46d6 --- /dev/null +++ b/graphblas_algorithms/algorithms/operators/unary.py @@ -0,0 +1,18 @@ +from graphblas import select + +from ..exceptions import GraphBlasAlgorithmException + +__all__ = ["complement", "reverse"] + + +def complement(G, *, name="complement"): + A = G._A + R = (~A.S).new(A.dtype, name=name) + R << select.offdiag(R) + return type(G)(R, key_to_id=G._key_to_id) + + +def reverse(G, copy=True): + if not G.is_directed(): + raise GraphBlasAlgorithmException("Cannot reverse an undirected graph.") + return G.reverse(copy=copy) diff --git a/graphblas_algorithms/algorithms/shortest_paths/generic.py b/graphblas_algorithms/algorithms/shortest_paths/generic.py index b92f7d6..ef86f89 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/generic.py +++ b/graphblas_algorithms/algorithms/shortest_paths/generic.py @@ -1,36 +1,12 @@ -from graphblas import Vector, replace -from graphblas.semiring import any_pair +from ..exceptions import NoPath +from .unweighted import bidirectional_shortest_path_length __all__ = ["has_path"] def has_path(G, source, target): - # Perform bidirectional BFS from source to target and target to source - src = G._key_to_id[source] - dst = G._key_to_id[target] - if src == dst: - return True - A = G.get_property("offdiag") - q_src = Vector(bool, size=A.nrows, name="q_src") - q_src[src] = True - seen_src = q_src.dup(name="seen_src") - q_dst = Vector(bool, size=A.nrows, name="q_dst") - q_dst[dst] = True - seen_dst = q_dst.dup(name="seen_dst", clear=True) - any_pair_bool = any_pair[bool] - for _i in range(A.nrows // 2): - q_src(~seen_src.S, replace) << any_pair_bool(q_src @ A) - if q_src.nvals == 0: - return False - if any_pair_bool(q_src @ q_dst): - return True - - seen_dst(q_dst.S) << True - q_dst(~seen_dst.S, replace) << any_pair_bool(A @ q_dst) - if q_dst.nvals == 0: - return False - if any_pair_bool(q_src @ q_dst): - return True - - seen_src(q_src.S) << True - return False + try: + bidirectional_shortest_path_length(G, source, target) + except NoPath: + return False + return True diff --git a/graphblas_algorithms/algorithms/shortest_paths/unweighted.py b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py index 5062e87..ec87b65 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/unweighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/unweighted.py @@ -1,6 +1,8 @@ -from graphblas import Matrix +from graphblas import Matrix, Vector, replace +from graphblas.semiring import any_pair from .._bfs import _bfs_level, _bfs_levels +from ..exceptions import NoPath __all__ = [ "single_source_shortest_path_length", @@ -10,18 +12,53 @@ def single_source_shortest_path_length(G, source, cutoff=None): - return _bfs_level(G, source, cutoff) + return _bfs_level(G, source, cutoff=cutoff) def single_target_shortest_path_length(G, target, cutoff=None): - return _bfs_level(G, target, cutoff, transpose=True) + return _bfs_level(G, target, cutoff=cutoff, transpose=True) def all_pairs_shortest_path_length(G, cutoff=None, *, nodes=None, expand_output=False): - D = _bfs_levels(G, nodes, cutoff) + D = _bfs_levels(G, nodes, cutoff=cutoff) if nodes is not None and expand_output and D.ncols != D.nrows: ids = G.list_to_ids(nodes) rv = Matrix(D.dtype, D.ncols, D.ncols, name=D.name) rv[ids, :] = D return rv return D + + +def bidirectional_shortest_path_length(G, source, target): + # Perform bidirectional BFS from source to target and target to source + # TODO: have this raise NodeNotFound? + if source not in G or target not in G: + raise KeyError(f"Either source {source} or target {target} is not in G") # NodeNotFound + src = G._key_to_id[source] + dst = G._key_to_id[target] + if src == dst: + return 0 + A = G.get_property("offdiag") + q_src = Vector(bool, size=A.nrows, name="q_src") + q_src[src] = True + seen_src = q_src.dup(name="seen_src") + q_dst = Vector(bool, size=A.nrows, name="q_dst") + q_dst[dst] = True + seen_dst = q_dst.dup(name="seen_dst", clear=True) + any_pair_bool = any_pair[bool] + for i in range(1, A.nrows + 1, 2): + q_src(~seen_src.S, replace) << any_pair_bool(q_src @ A) + if q_src.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + + seen_dst(q_dst.S) << True + q_dst(~seen_dst.S, replace) << any_pair_bool(A @ q_dst) + if q_dst.nvals == 0: + raise NoPath(f"No path between {source} and {target}.") + if any_pair_bool(q_src @ q_dst): + return i + 1 + + seen_src(q_src.S) << True + raise NoPath(f"No path between {source} and {target}.") diff --git a/graphblas_algorithms/algorithms/shortest_paths/weighted.py b/graphblas_algorithms/algorithms/shortest_paths/weighted.py index d2a9f0e..5afa0f4 100644 --- a/graphblas_algorithms/algorithms/shortest_paths/weighted.py +++ b/graphblas_algorithms/algorithms/shortest_paths/weighted.py @@ -3,40 +3,55 @@ from graphblas.semiring import any_pair, min_plus from .._bfs import _bfs_level, _bfs_levels, _bfs_parent, _bfs_plain -from ..exceptions import Unbounded +from ..exceptions import NoPath, Unbounded __all__ = [ "single_source_bellman_ford_path_length", "bellman_ford_path", + "bellman_ford_path_length", "bellman_ford_path_lengths", "negative_edge_cycle", ] -def single_source_bellman_ford_path_length(G, source, *, cutoff=None): +def _bellman_ford_path_length(G, source, target=None, *, cutoff=None, name): # No need for `is_weighted=` keyword, b/c this is assumed to be weighted (I think) - index = G._key_to_id[source] + src_id = G._key_to_id[source] + if target is not None: + dst_id = G._key_to_id[target] + else: + dst_id = None + if G.get_property("is_iso"): # If the edges are iso-valued (and positive), then we can simply do level BFS is_negative, iso_value = G.get_properties("has_negative_edges+ iso_value") if not is_negative: if cutoff is not None: cutoff = int(cutoff // iso_value) - d = _bfs_level(G, source, cutoff, dtype=iso_value.dtype) + d = _bfs_level(G, source, target, cutoff=cutoff, dtype=iso_value.dtype) + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") if iso_value != 1: d *= iso_value return d # It's difficult to detect negative cycles with BFS - if G._A[index, index].get() is not None: + if G._A[src_id, src_id].get() is not None: raise Unbounded("Negative cycle detected.") - if not G.is_directed() and G._A[index, :].nvals > 0: + if not G.is_directed() and G._A[src_id, :].nvals > 0: # For undirected graphs, any negative edge is a cycle raise Unbounded("Negative cycle detected.") # Use `offdiag` instead of `A`, b/c self-loops don't contribute to the result, # and negative self-loops are easy negative cycles to avoid. # We check if we hit a self-loop negative cycle at the end. - A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + if dst_id is None: + A, has_negative_diagonal = G.get_properties("offdiag has_negative_diagonal") + else: + A, is_negative, has_negative_diagonal = G.get_properties( + "offdiag has_negative_edges- has_negative_diagonal" + ) if A.dtype == bool: # Should we upcast e.g. INT8 to INT64 as well? dtype = int @@ -44,7 +59,7 @@ def single_source_bellman_ford_path_length(G, source, *, cutoff=None): dtype = A.dtype n = A.nrows d = Vector(dtype, n, name="single_source_bellman_ford_path_length") - d[index] = 0 + d[src_id] = 0 cur = d.dup(name="cur") mask = Vector(bool, n, name="mask") one = unary.one[bool] @@ -66,13 +81,16 @@ def single_source_bellman_ford_path_length(G, source, *, cutoff=None): break # Update `d` with values that improved d(cur.S) << cur + if dst_id is not None and not is_negative: + # Limit exploration if we have a target + cutoff = cur.get(dst_id, cutoff) else: # Check for negative cycle when for loop completes without breaking cur << min_plus(cur @ A) if cutoff is not None: cur << select.valuele(cur, cutoff) mask << binary.lt(cur & d) - if mask.reduce(monoid.lor): + if dst_id is None and mask.reduce(monoid.lor) or dst_id is not None and mask.get(dst_id): raise Unbounded("Negative cycle detected.") if has_negative_diagonal: # We removed diagonal entries above, so check if we visited one with a negative weight @@ -80,9 +98,23 @@ def single_source_bellman_ford_path_length(G, source, *, cutoff=None): cur << select.valuelt(diag, 0) if any_pair(d @ cur): raise Unbounded("Negative cycle detected.") + if dst_id is not None: + d = d.get(dst_id) + if d is None: + raise NoPath(f"node {target} not reachable from {source}") return d +def single_source_bellman_ford_path_length( + G, source, *, cutoff=None, name="single_source_bellman_ford_path_length" +): + return _bellman_ford_path_length(G, source, cutoff=cutoff, name=name) + + +def bellman_ford_path_length(G, source, target): + return _bellman_ford_path_length(G, source, target, name="bellman_ford_path_length") + + def bellman_ford_path_lengths(G, nodes=None, *, expand_output=False): """Extra parameter: expand_output @@ -185,7 +217,7 @@ def bellman_ford_path(G, source, target): # If the edges are iso-valued (and positive), then we can simply do level BFS is_negative = G.get_property("has_negative_edges+") if not is_negative: - p = _bfs_parent(G, source, target=target) + p = _bfs_parent(G, source, target) return _reconstruct_path_from_parents(G, p, src_id, dst_id) raise Unbounded("Negative cycle detected.") A, is_negative, has_negative_diagonal = G.get_properties( diff --git a/graphblas_algorithms/algorithms/traversal/breadth_first_search.py b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py index e9be539..a761134 100644 --- a/graphblas_algorithms/algorithms/traversal/breadth_first_search.py +++ b/graphblas_algorithms/algorithms/traversal/breadth_first_search.py @@ -11,7 +11,7 @@ def bfs_layers(G, sources): if sources in G: sources = [sources] ids = G.list_to_ids(sources) - if not ids: + if ids is None or len(ids) == 0: return A = G.get_property("offdiag") n = A.nrows diff --git a/graphblas_algorithms/classes/_utils.py b/graphblas_algorithms/classes/_utils.py index 65ae010..d15a188 100644 --- a/graphblas_algorithms/classes/_utils.py +++ b/graphblas_algorithms/classes/_utils.py @@ -85,7 +85,7 @@ def list_to_ids(self, nodes): if nodes is None: return None key_to_id = self._key_to_id - return [key_to_id[key] for key in nodes] + return np.fromiter((key_to_id[key] for key in nodes), np.uint64) def list_to_keys(self, indices): diff --git a/graphblas_algorithms/classes/digraph.py b/graphblas_algorithms/classes/digraph.py index bae66ae..8da9c8a 100644 --- a/graphblas_algorithms/classes/digraph.py +++ b/graphblas_algorithms/classes/digraph.py @@ -1,4 +1,5 @@ from collections import defaultdict +from copy import deepcopy import graphblas as gb from graphblas import Matrix, binary, replace, select, unary @@ -609,6 +610,15 @@ def to_undirected(self, reciprocal=False, as_view=False, *, name=None): B = binary.any(A | A.T).new(name=name) return Graph(B, key_to_id=self._key_to_id) + def reverse(self, copy=True): + # We could even re-use many of the cached values + A = self._A.T # This probably mostly works, but does not yet support assignment + if copy: + A = A.new() + rv = type(self)(A, key_to_id=self._key_to_id) + rv.graph.update(deepcopy(self.graph)) + return rv + class MultiDiGraph(DiGraph): def is_multigraph(self): diff --git a/graphblas_algorithms/interface.py b/graphblas_algorithms/interface.py index d0a8365..a43b520 100644 --- a/graphblas_algorithms/interface.py +++ b/graphblas_algorithms/interface.py @@ -1,111 +1,174 @@ from . import nxapi ####### -# NOTE: Remember to update README.md when adding or removing algorithms from Dispatcher +# NOTE: Remember to run `python scripts/maketree.py` when adding or removing algorithms +# to automatically add it to README.md. You must still add algorithms below. ####### class Dispatcher: - # Boundary - edge_boundary = nxapi.boundary.edge_boundary - node_boundary = nxapi.boundary.node_boundary - # Centrality - degree_centrality = nxapi.centrality.degree_alg.degree_centrality - eigenvector_centrality = nxapi.centrality.eigenvector.eigenvector_centrality - in_degree_centrality = nxapi.centrality.degree_alg.in_degree_centrality - katz_centrality = nxapi.centrality.katz.katz_centrality - out_degree_centrality = nxapi.centrality.degree_alg.out_degree_centrality - # Cluster - average_clustering = nxapi.cluster.average_clustering - clustering = nxapi.cluster.clustering - generalized_degree = nxapi.cluster.generalized_degree - square_clustering = nxapi.cluster.square_clustering - transitivity = nxapi.cluster.transitivity - triangles = nxapi.cluster.triangles - # Community - inter_community_edges = nxapi.community.quality.inter_community_edges - intra_community_edges = nxapi.community.quality.intra_community_edges - # Components - is_connected = nxapi.components.connected.is_connected - node_connected_component = nxapi.components.connected.node_connected_component - is_weakly_connected = nxapi.components.weakly_connected.is_weakly_connected - # Core - k_truss = nxapi.core.k_truss - # Cuts - boundary_expansion = nxapi.cuts.boundary_expansion - conductance = nxapi.cuts.conductance - cut_size = nxapi.cuts.cut_size - edge_expansion = nxapi.cuts.edge_expansion - mixing_expansion = nxapi.cuts.mixing_expansion - node_expansion = nxapi.cuts.node_expansion - normalized_cut_size = nxapi.cuts.normalized_cut_size - volume = nxapi.cuts.volume - # DAG - ancestors = nxapi.dag.ancestors - descendants = nxapi.dag.descendants - # Dominating - is_dominating_set = nxapi.dominating.is_dominating_set - # Generators - ego_graph = nxapi.generators.ego.ego_graph - # Isolate - is_isolate = nxapi.isolate.is_isolate - isolates = nxapi.isolate.isolates - number_of_isolates = nxapi.isolate.number_of_isolates - # Link Analysis - hits = nxapi.link_analysis.hits_alg.hits - google_matrix = nxapi.link_analysis.pagerank_alg.google_matrix - pagerank = nxapi.link_analysis.pagerank_alg.pagerank - # Operators - compose = nxapi.operators.binary.compose - difference = nxapi.operators.binary.difference - disjoint_union = nxapi.operators.binary.disjoint_union - full_join = nxapi.operators.binary.full_join - intersection = nxapi.operators.binary.intersection - symmetric_difference = nxapi.operators.binary.symmetric_difference - union = nxapi.operators.binary.union - # Reciprocity + # Begin auto-generated code: dispatch + mod = nxapi.boundary + # ================== + edge_boundary = mod.edge_boundary + node_boundary = mod.node_boundary + + mod = nxapi.centrality + # ==================== + degree_centrality = mod.degree_alg.degree_centrality + in_degree_centrality = mod.degree_alg.in_degree_centrality + out_degree_centrality = mod.degree_alg.out_degree_centrality + eigenvector_centrality = mod.eigenvector.eigenvector_centrality + katz_centrality = mod.katz.katz_centrality + + mod = nxapi.cluster + # ================= + average_clustering = mod.average_clustering + clustering = mod.clustering + generalized_degree = mod.generalized_degree + square_clustering = mod.square_clustering + transitivity = mod.transitivity + triangles = mod.triangles + + mod = nxapi.community + # =================== + inter_community_edges = mod.quality.inter_community_edges + intra_community_edges = mod.quality.intra_community_edges + + mod = nxapi.components + # ==================== + is_connected = mod.connected.is_connected + node_connected_component = mod.connected.node_connected_component + is_weakly_connected = mod.weakly_connected.is_weakly_connected + + mod = nxapi.core + # ============== + k_truss = mod.k_truss + + mod = nxapi.cuts + # ============== + boundary_expansion = mod.boundary_expansion + conductance = mod.conductance + cut_size = mod.cut_size + edge_expansion = mod.edge_expansion + mixing_expansion = mod.mixing_expansion + node_expansion = mod.node_expansion + normalized_cut_size = mod.normalized_cut_size + volume = mod.volume + + mod = nxapi.dag + # ============= + ancestors = mod.ancestors + descendants = mod.descendants + + mod = nxapi.dominating + # ==================== + is_dominating_set = mod.is_dominating_set + + mod = nxapi.efficiency_measures + # ============================= + efficiency = mod.efficiency + + mod = nxapi.generators + # ==================== + ego_graph = mod.ego.ego_graph + + mod = nxapi.isolate + # ================= + is_isolate = mod.is_isolate + isolates = mod.isolates + number_of_isolates = mod.number_of_isolates + + mod = nxapi.isomorphism + # ===================== + fast_could_be_isomorphic = mod.isomorph.fast_could_be_isomorphic + faster_could_be_isomorphic = mod.isomorph.faster_could_be_isomorphic + + mod = nxapi.linalg + # ================ + bethe_hessian_matrix = mod.bethehessianmatrix.bethe_hessian_matrix + adjacency_matrix = mod.graphmatrix.adjacency_matrix + laplacian_matrix = mod.laplacianmatrix.laplacian_matrix + normalized_laplacian_matrix = mod.laplacianmatrix.normalized_laplacian_matrix + directed_modularity_matrix = mod.modularitymatrix.directed_modularity_matrix + modularity_matrix = mod.modularitymatrix.modularity_matrix + + mod = nxapi.link_analysis + # ======================= + hits = mod.hits_alg.hits + google_matrix = mod.pagerank_alg.google_matrix + pagerank = mod.pagerank_alg.pagerank + + mod = nxapi.lowest_common_ancestors + # ================================= + lowest_common_ancestor = mod.lowest_common_ancestor + + mod = nxapi.operators + # =================== + compose = mod.binary.compose + difference = mod.binary.difference + disjoint_union = mod.binary.disjoint_union + full_join = mod.binary.full_join + intersection = mod.binary.intersection + symmetric_difference = mod.binary.symmetric_difference + union = mod.binary.union + complement = mod.unary.complement + reverse = mod.unary.reverse + + mod = nxapi.reciprocity + # ===================== overall_reciprocity = nxapi.overall_reciprocity reciprocity = nxapi.reciprocity - # Regular - is_k_regular = nxapi.regular.is_k_regular - is_regular = nxapi.regular.is_regular - # Shortest Paths - floyd_warshall = nxapi.shortest_paths.dense.floyd_warshall - floyd_warshall_numpy = nxapi.shortest_paths.dense.floyd_warshall_numpy - floyd_warshall_predecessor_and_distance = ( - nxapi.shortest_paths.dense.floyd_warshall_predecessor_and_distance - ) - has_path = nxapi.shortest_paths.generic.has_path - single_source_shortest_path_length = ( - nxapi.shortest_paths.unweighted.single_source_shortest_path_length - ) - single_target_shortest_path_length = ( - nxapi.shortest_paths.unweighted.single_target_shortest_path_length - ) - all_pairs_shortest_path_length = nxapi.shortest_paths.unweighted.all_pairs_shortest_path_length - bellman_ford_path = nxapi.shortest_paths.weighted.bellman_ford_path - all_pairs_bellman_ford_path_length = ( - nxapi.shortest_paths.weighted.all_pairs_bellman_ford_path_length - ) - negative_edge_cycle = nxapi.shortest_paths.weighted.negative_edge_cycle - single_source_bellman_ford_path_length = ( - nxapi.shortest_paths.weighted.single_source_bellman_ford_path_length - ) - # Simple Paths - is_simple_path = nxapi.simple_paths.is_simple_path - # S Metric - s_metric = nxapi.smetric.s_metric - # Structural Holes - mutual_weight = nxapi.structuralholes.mutual_weight - # Tournament - is_tournament = nxapi.tournament.is_tournament - score_sequence = nxapi.tournament.score_sequence - tournament_matrix = nxapi.tournament.tournament_matrix - # Traversal - bfs_layers = nxapi.traversal.breadth_first_search.bfs_layers - descendants_at_distance = nxapi.traversal.breadth_first_search.descendants_at_distance - # Triads - is_triad = nxapi.triads.is_triad + + mod = nxapi.regular + # ================= + is_k_regular = mod.is_k_regular + is_regular = mod.is_regular + + mod = nxapi.shortest_paths + # ======================== + floyd_warshall = mod.dense.floyd_warshall + floyd_warshall_numpy = mod.dense.floyd_warshall_numpy + floyd_warshall_predecessor_and_distance = mod.dense.floyd_warshall_predecessor_and_distance + has_path = mod.generic.has_path + all_pairs_shortest_path_length = mod.unweighted.all_pairs_shortest_path_length + single_source_shortest_path_length = mod.unweighted.single_source_shortest_path_length + single_target_shortest_path_length = mod.unweighted.single_target_shortest_path_length + all_pairs_bellman_ford_path_length = mod.weighted.all_pairs_bellman_ford_path_length + bellman_ford_path = mod.weighted.bellman_ford_path + bellman_ford_path_length = mod.weighted.bellman_ford_path_length + negative_edge_cycle = mod.weighted.negative_edge_cycle + single_source_bellman_ford_path_length = mod.weighted.single_source_bellman_ford_path_length + + mod = nxapi.simple_paths + # ====================== + is_simple_path = mod.is_simple_path + + mod = nxapi.smetric + # ================= + s_metric = mod.s_metric + + mod = nxapi.structuralholes + # ========================= + mutual_weight = mod.mutual_weight + + mod = nxapi.tournament + # ==================== + is_tournament = mod.is_tournament + score_sequence = mod.score_sequence + tournament_matrix = mod.tournament_matrix + + mod = nxapi.traversal + # =================== + bfs_layers = mod.breadth_first_search.bfs_layers + descendants_at_distance = mod.breadth_first_search.descendants_at_distance + + mod = nxapi.triads + # ================ + is_triad = mod.is_triad + + del mod + # End auto-generated code: dispatch @staticmethod def convert_from_nx(graph, weight=None, *, name=None): @@ -125,14 +188,30 @@ def convert_from_nx(graph, weight=None, *, name=None): @staticmethod def convert_to_nx(obj, *, name=None): - from graphblas import Matrix + from graphblas import Matrix, io from .classes import Graph if isinstance(obj, Graph): obj = obj.to_networkx() elif isinstance(obj, Matrix): - obj = obj.to_dense(fill_value=False) + if name in { + "adjacency_matrix", + "bethe_hessian_matrix", + "laplacian_matrix", + "normalized_laplacian_matrix", + "tournament_matrix", + }: + obj = io.to_scipy_sparse(obj) + elif name in { + "directed_modularity_matrix", + "floyd_warshall_numpy", + "google_matrix", + "modularity_matrix", + }: + obj = obj.to_dense(fill_value=False) + else: # pragma: no cover + raise RuntimeError(f"Should {name} return a numpy or scipy.sparse array?") return obj @staticmethod diff --git a/graphblas_algorithms/linalg/__init__.py b/graphblas_algorithms/linalg/__init__.py new file mode 100644 index 0000000..5fb0b2b --- /dev/null +++ b/graphblas_algorithms/linalg/__init__.py @@ -0,0 +1,4 @@ +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/linalg/bethehessianmatrix.py b/graphblas_algorithms/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..edd000f --- /dev/null +++ b/graphblas_algorithms/linalg/bethehessianmatrix.py @@ -0,0 +1,25 @@ +from graphblas import Vector, binary + +__all__ = ["bethe_hessian_matrix"] + + +def bethe_hessian_matrix(G, r=None, nodelist=None, *, name="bethe_hessian_matrix"): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + d = A.reduce_rowwise().new(name="d") + else: + d = G.get_property("plus_rowwise+") + if r is None: + degrees = G.get_property("degrees+") + k = degrees.reduce().get(0) + k2 = (degrees @ degrees).get(0) + r = k2 / k - 1 + n = A.nrows + # result = (r**2 - 1) * I - r * A + D + ri = Vector.from_scalar(r**2 - 1.0, n, name="ri") + ri += d + rI = ri.diag(name=name) + rI(binary.plus) << binary.times(-r, A) # rI += -r * A + return rI diff --git a/graphblas_algorithms/linalg/graphmatrix.py b/graphblas_algorithms/linalg/graphmatrix.py new file mode 100644 index 0000000..0eff6ef --- /dev/null +++ b/graphblas_algorithms/linalg/graphmatrix.py @@ -0,0 +1,19 @@ +from graphblas import unary + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, is_weighted=False, *, name="adjacency_matrix"): + if dtype is None: + dtype = G._A.dtype + if G.is_multigraph(): + is_weighted = True # XXX + if nodelist is None: + if not is_weighted: + return unary.one[dtype](G._A).new(name=name) + return G._A.dup(dtype, name=name) + ids = G.list_to_ids(nodelist) + A = G._A[ids, ids].new(dtype, name=name) + if not is_weighted: + A << unary.one(A) + return A diff --git a/graphblas_algorithms/linalg/laplacianmatrix.py b/graphblas_algorithms/linalg/laplacianmatrix.py new file mode 100644 index 0000000..18ed65a --- /dev/null +++ b/graphblas_algorithms/linalg/laplacianmatrix.py @@ -0,0 +1,54 @@ +from graphblas import monoid, unary + +__all__ = [ + "laplacian_matrix", + "normalized_laplacian_matrix", +] + + +def _laplacian_helper(G, nodelist=None, is_weighted=False): + if G.is_multigraph(): + is_weighted = True # XXX + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + d = A.reduce_rowwise(monoid.plus).new() + elif is_weighted: + d = G.get_property("plus_rowwise+") + else: + d = G.get_property("degrees+") + A = unary.one(A).new() + return d, A + + +def laplacian_matrix(G, nodelist=None, is_weighted=False, *, name="laplacian_matrix"): + d, A = _laplacian_helper(G, nodelist, is_weighted) + D = d.diag(name="D") + return (D - A).new(name=name) + + +def normalized_laplacian_matrix( + G, nodelist=None, is_weighted=False, *, name="normalized_laplacian_matrix" +): + d, A = _laplacian_helper(G, nodelist, is_weighted) + d_invsqrt = unary.sqrt(d).new(name="d_invsqrt") + d_invsqrt << unary.minv(d_invsqrt) + + # XXX: what if `d` is 0 and `d_invsqrt` is infinity? (not tested) + # d_invsqrt(unary.isinf(d_invsqrt)) << 0 + + # Calculate: A_weighted = D_invsqrt @ A @ D_invsqrt + A_weighted = d_invsqrt.outer(d_invsqrt).new(mask=A.S, name=name) + A_weighted *= A + # Alt (no idea which implementation is better) + # D_invsqrt = d_invsqrt.diag(name="D_invsqrt") + # A_weighted = (D_invsqrt @ A).new(name=name) + # A_weighted @= D_invsqrt + + d_invsqrt << unary.one(d_invsqrt) + D = d_invsqrt.diag(name="D") + A_weighted << D - A_weighted + return A_weighted diff --git a/graphblas_algorithms/linalg/modularitymatrix.py b/graphblas_algorithms/linalg/modularitymatrix.py new file mode 100644 index 0000000..1efff65 --- /dev/null +++ b/graphblas_algorithms/linalg/modularitymatrix.py @@ -0,0 +1,37 @@ +from graphblas import monoid, unary + +from .laplacianmatrix import _laplacian_helper + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +def modularity_matrix(G, nodelist=None, is_weighted=False, *, name="modularity_matrix"): + k, A = _laplacian_helper(G, nodelist, is_weighted) + m = k.reduce().get(0) + X = k.outer(k).new(float, name=name) + X /= m + X << A - X + return X + + +def directed_modularity_matrix( + G, nodelist=None, is_weighted=False, *, name="directed_modularity_matrix" +): + A = G._A + if nodelist is not None: + ids = G.list_to_ids(nodelist) + A = A[ids, ids].new() + if not is_weighted: + A << unary.one(A) + k_out = A.reduce_rowwise(monoid.plus).new() + k_in = A.reduce_columnwise(monoid.plus).new() + elif is_weighted: + k_out, k_in = G.get_properties("plus_rowwise+ plus_columnwise+") + else: + A = unary.one(A).new() + k_out, k_in = G.get_properties("row_degrees+ column_degrees+") + m = k_out.reduce().get(0) + X = k_out.outer(k_in).new(float, name=name) + X /= m + X << A - X + return X diff --git a/graphblas_algorithms/nxapi/__init__.py b/graphblas_algorithms/nxapi/__init__.py index 2d36017..97d4249 100644 --- a/graphblas_algorithms/nxapi/__init__.py +++ b/graphblas_algorithms/nxapi/__init__.py @@ -7,9 +7,13 @@ from .cuts import * from .dag import * from .dominating import * +from .efficiency_measures import * from .generators import * from .isolate import * +from .isomorphism import fast_could_be_isomorphic, faster_could_be_isomorphic +from .linalg import * from .link_analysis import * +from .lowest_common_ancestors import * from .operators import * from .reciprocity import * from .regular import * @@ -19,13 +23,18 @@ from .structuralholes import * from .traversal import * from .triads import * +from .tournament import is_tournament from . import centrality from . import cluster from . import community from . import components +from . import efficiency_measures from . import generators +from . import isomorphism +from . import linalg from . import link_analysis +from . import lowest_common_ancestors from . import operators from . import shortest_paths from . import tournament diff --git a/graphblas_algorithms/nxapi/efficiency_measures.py b/graphblas_algorithms/nxapi/efficiency_measures.py new file mode 100644 index 0000000..06971a2 --- /dev/null +++ b/graphblas_algorithms/nxapi/efficiency_measures.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + + +@not_implemented_for("directed") +def efficiency(G, u, v): + G = to_undirected_graph(G) + return algorithms.efficiency(G, u, v) diff --git a/graphblas_algorithms/nxapi/exception.py b/graphblas_algorithms/nxapi/exception.py index 2630384..0804bb1 100644 --- a/graphblas_algorithms/nxapi/exception.py +++ b/graphblas_algorithms/nxapi/exception.py @@ -5,6 +5,9 @@ class NetworkXError(Exception): pass + class NetworkXNoPath(Exception): + pass + class NetworkXPointlessConcept(Exception): pass @@ -20,6 +23,7 @@ class PowerIterationFailedConvergence(Exception): else: from networkx import ( NetworkXError, + NetworkXNoPath, NetworkXPointlessConcept, NetworkXUnbounded, NodeNotFound, diff --git a/graphblas_algorithms/nxapi/isomorphism/__init__.py b/graphblas_algorithms/nxapi/isomorphism/__init__.py new file mode 100644 index 0000000..e701b70 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/__init__.py @@ -0,0 +1 @@ +from .isomorph import * diff --git a/graphblas_algorithms/nxapi/isomorphism/isomorph.py b/graphblas_algorithms/nxapi/isomorphism/isomorph.py new file mode 100644 index 0000000..1dedb64 --- /dev/null +++ b/graphblas_algorithms/nxapi/isomorphism/isomorph.py @@ -0,0 +1,25 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = [ + "fast_could_be_isomorphic", + "faster_could_be_isomorphic", +] + + +def fast_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.fast_could_be_isomorphic(G1, G2) + + +fast_graph_could_be_isomorphic = fast_could_be_isomorphic + + +def faster_could_be_isomorphic(G1, G2): + G1 = to_graph(G1) + G2 = to_graph(G2) + return algorithms.faster_could_be_isomorphic(G1, G2) + + +faster_graph_could_be_isomorphic = faster_could_be_isomorphic diff --git a/graphblas_algorithms/nxapi/linalg/__init__.py b/graphblas_algorithms/nxapi/linalg/__init__.py new file mode 100644 index 0000000..aada0f4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/__init__.py @@ -0,0 +1,5 @@ +from . import bethehessianmatrix, graphmatrix, laplacianmatrix, modularitymatrix +from .bethehessianmatrix import * +from .graphmatrix import * +from .laplacianmatrix import * +from .modularitymatrix import * diff --git a/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py new file mode 100644 index 0000000..7fa30b4 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/bethehessianmatrix.py @@ -0,0 +1,12 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["bethe_hessian_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def bethe_hessian_matrix(G, r=None, nodelist=None): + G = to_undirected_graph(G) + return linalg.bethe_hessian_matrix(G, r=r, nodelist=nodelist) diff --git a/graphblas_algorithms/nxapi/linalg/graphmatrix.py b/graphblas_algorithms/nxapi/linalg/graphmatrix.py new file mode 100644 index 0000000..0b3e7d9 --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/graphmatrix.py @@ -0,0 +1,9 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["adjacency_matrix"] + + +def adjacency_matrix(G, nodelist=None, dtype=None, weight="weight"): + G = to_graph(G, weight=weight, dtype=dtype) + return linalg.adjacency_matrix(G, nodelist, dtype, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py new file mode 100644 index 0000000..752ca1e --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/laplacianmatrix.py @@ -0,0 +1,14 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_graph + +__all__ = ["laplacian_matrix", "normalized_laplacian_matrix"] + + +def laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.laplacian_matrix(G, nodelist, is_weighted=weight is not None) + + +def normalized_laplacian_matrix(G, nodelist=None, weight="weight"): + G = to_graph(G, weight=weight) + return linalg.normalized_laplacian_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/linalg/modularitymatrix.py b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py new file mode 100644 index 0000000..76e160f --- /dev/null +++ b/graphblas_algorithms/nxapi/linalg/modularitymatrix.py @@ -0,0 +1,20 @@ +from graphblas_algorithms import linalg +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.classes.graph import to_undirected_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["modularity_matrix", "directed_modularity_matrix"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +def modularity_matrix(G, nodelist=None, weight=None): + G = to_undirected_graph(G, weight=weight) + return linalg.modularity_matrix(G, nodelist, is_weighted=weight is not None) + + +@not_implemented_for("undirected") +@not_implemented_for("multigraph") +def directed_modularity_matrix(G, nodelist=None, weight=None): + G = to_directed_graph(G, weight=weight) + return linalg.directed_modularity_matrix(G, nodelist, is_weighted=weight is not None) diff --git a/graphblas_algorithms/nxapi/lowest_common_ancestors.py b/graphblas_algorithms/nxapi/lowest_common_ancestors.py new file mode 100644 index 0000000..f94e8c2 --- /dev/null +++ b/graphblas_algorithms/nxapi/lowest_common_ancestors.py @@ -0,0 +1,11 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_directed_graph +from graphblas_algorithms.utils import not_implemented_for + +__all__ = ["lowest_common_ancestor"] + + +@not_implemented_for("undirected") +def lowest_common_ancestor(G, node1, node2, default=None): + G = to_directed_graph(G) + return algorithms.lowest_common_ancestor(G, node1, node2, default=default) diff --git a/graphblas_algorithms/nxapi/operators/__init__.py b/graphblas_algorithms/nxapi/operators/__init__.py index 6e308b5..c2742b9 100644 --- a/graphblas_algorithms/nxapi/operators/__init__.py +++ b/graphblas_algorithms/nxapi/operators/__init__.py @@ -1 +1,2 @@ from .binary import * +from .unary import * diff --git a/graphblas_algorithms/nxapi/operators/unary.py b/graphblas_algorithms/nxapi/operators/unary.py new file mode 100644 index 0000000..6633b3b --- /dev/null +++ b/graphblas_algorithms/nxapi/operators/unary.py @@ -0,0 +1,22 @@ +from graphblas_algorithms import algorithms +from graphblas_algorithms.classes.digraph import to_graph + +from ..exception import NetworkXError + +__all__ = [ + "complement", + "reverse", +] + + +def complement(G): + G = to_graph(G) + return algorithms.complement(G) + + +def reverse(G, copy=True): + G = to_graph(G) + try: + return algorithms.reverse(G, copy=copy) + except algorithms.exceptions.GraphBlasAlgorithmException as e: + raise NetworkXError(*e.args) from e diff --git a/graphblas_algorithms/nxapi/shortest_paths/dense.py b/graphblas_algorithms/nxapi/shortest_paths/dense.py index cc86eb7..82c2eed 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/dense.py +++ b/graphblas_algorithms/nxapi/shortest_paths/dense.py @@ -1,5 +1,3 @@ -import numpy as np - from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_graph @@ -28,7 +26,7 @@ def floyd_warshall_numpy(G, nodelist=None, weight="weight"): if nodelist is not None: if not (len(nodelist) == len(G) == len(set(nodelist))): raise NetworkXError("nodelist must contain every node in G with no repeats.") - permutation = np.array(G.list_to_ids(nodelist), np.uint64) + permutation = G.list_to_ids(nodelist) else: permutation = None try: diff --git a/graphblas_algorithms/nxapi/shortest_paths/weighted.py b/graphblas_algorithms/nxapi/shortest_paths/weighted.py index 9916e45..b08dd85 100644 --- a/graphblas_algorithms/nxapi/shortest_paths/weighted.py +++ b/graphblas_algorithms/nxapi/shortest_paths/weighted.py @@ -1,12 +1,13 @@ -from graphblas_algorithms import algorithms +from graphblas_algorithms import algorithms, exceptions from graphblas_algorithms.classes.digraph import to_graph from .._utils import normalize_chunksize, partition -from ..exception import NetworkXUnbounded, NodeNotFound +from ..exception import NetworkXNoPath, NetworkXUnbounded, NodeNotFound __all__ = [ "all_pairs_bellman_ford_path_length", "bellman_ford_path", + "bellman_ford_path_length", "negative_edge_cycle", "single_source_bellman_ford_path_length", ] @@ -65,6 +66,16 @@ def bellman_ford_path(G, source, target, weight="weight"): raise NodeNotFound(*e.args) from e +def bellman_ford_path_length(G, source, target, weight="weight"): + G = to_graph(G, weight=weight) + try: + return algorithms.bellman_ford_path_length(G, source, target) + except KeyError as e: + raise NodeNotFound(*e.args) from e + except exceptions.NoPath as e: + raise NetworkXNoPath(*e.args) from e + + def negative_edge_cycle(G, weight="weight", heuristic=True): # TODO: what if weight is a function? # TODO: use a heuristic to try to stop early diff --git a/graphblas_algorithms/nxapi/tournament.py b/graphblas_algorithms/nxapi/tournament.py index d951ade..6c1bb1f 100644 --- a/graphblas_algorithms/nxapi/tournament.py +++ b/graphblas_algorithms/nxapi/tournament.py @@ -1,5 +1,3 @@ -from graphblas import io - from graphblas_algorithms import algorithms from graphblas_algorithms.classes.digraph import to_directed_graph from graphblas_algorithms.utils import not_implemented_for @@ -28,6 +26,5 @@ def score_sequence(G): @not_implemented_for("multigraph") def tournament_matrix(G): G = to_directed_graph(G) - T = algorithms.tournament_matrix(G) # TODO: can we return a different, more native object? - return io.to_scipy_sparse(T) + return algorithms.tournament_matrix(G) diff --git a/graphblas_algorithms/tests/test_match_nx.py b/graphblas_algorithms/tests/test_match_nx.py index 6250b1d..225c970 100644 --- a/graphblas_algorithms/tests/test_match_nx.py +++ b/graphblas_algorithms/tests/test_match_nx.py @@ -162,13 +162,20 @@ def test_dispatched_funcs_in_nxapi(nx_names_to_info, gb_names_to_info): raise AssertionError +def get_fullname(info): + fullname = info.fullname + if not fullname.endswith(f".{info.dispatchname}"): + fullname += f" ({info.dispatchname})" + return fullname + + def test_print_dispatched_not_implemented(nx_names_to_info, gb_names_to_info): """It may be informative to see the results from this to identify functions to implement. $ pytest -s -k test_print_dispatched_not_implemented """ not_implemented = nx_names_to_info.keys() - gb_names_to_info.keys() - fullnames = {next(iter(nx_names_to_info[name])).fullname for name in not_implemented} + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in not_implemented} print() print("=================================================================================") print("Functions dispatched in NetworkX that ARE NOT implemented in graphblas-algorithms") @@ -184,7 +191,7 @@ def test_print_dispatched_implemented(nx_names_to_info, gb_names_to_info): $ pytest -s -k test_print_dispatched_implemented """ implemented = nx_names_to_info.keys() & gb_names_to_info.keys() - fullnames = {next(iter(nx_names_to_info[name])).fullname for name in implemented} + fullnames = {get_fullname(next(iter(nx_names_to_info[name]))) for name in implemented} print() print("=============================================================================") print("Functions dispatched in NetworkX that ARE implemented in graphblas-algorithms") diff --git a/pyproject.toml b/pyproject.toml index 1772aa2..36afd28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,6 +95,7 @@ packages = [ "graphblas_algorithms.algorithms.centrality", "graphblas_algorithms.algorithms.community", "graphblas_algorithms.algorithms.components", + "graphblas_algorithms.algorithms.isomorphism", "graphblas_algorithms.algorithms.link_analysis", "graphblas_algorithms.algorithms.operators", "graphblas_algorithms.algorithms.shortest_paths", @@ -102,11 +103,14 @@ packages = [ "graphblas_algorithms.algorithms.traversal", "graphblas_algorithms.classes", "graphblas_algorithms.generators", + "graphblas_algorithms.linalg", "graphblas_algorithms.nxapi", "graphblas_algorithms.nxapi.centrality", "graphblas_algorithms.nxapi.community", "graphblas_algorithms.nxapi.components", "graphblas_algorithms.nxapi.generators", + "graphblas_algorithms.nxapi.isomorphism", + "graphblas_algorithms.nxapi.linalg", "graphblas_algorithms.nxapi.link_analysis", "graphblas_algorithms.nxapi.operators", "graphblas_algorithms.nxapi.shortest_paths", @@ -231,6 +235,7 @@ ignore = [ "TID", # flake8-tidy-imports (Rely on isort and our own judgement) "TCH", # flake8-type-checking (Note: figure out type checking later) "ARG", # flake8-unused-arguments (Sometimes helpful, but too strict) + "TD", # flake8-todos (Maybe okay to add some of these) "ERA", # eradicate (We like code in comments!) "PD", # pandas-vet (Intended for scripts that use pandas, not libraries) ] @@ -238,6 +243,7 @@ ignore = [ [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) "graphblas_algorithms/**/tests/*py" = ["S101", "T201", "D103", "D100"] # Allow assert, print, and no docstring +"graphblas_algorithms/interface.py" = ["PIE794"] # Allow us to use `mod = nxapi.` repeatedly "graphblas_algorithms/nxapi/exception.py" = ["F401"] # Allow unused imports (w/o defining `__all__`) "scripts/*.py" = ["INP001", "S101", "T201"] # Not a package, allow assert, allow print diff --git a/scripts/maketree.py b/scripts/maketree.py new file mode 100755 index 0000000..e4deed5 --- /dev/null +++ b/scripts/maketree.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +"""Run this script to auto-generate API when adding or removing nxapi functions. + +This updates API tree in README.md and dispatch functions in `graphblas_algorithms/interface.py`. + +""" +from io import StringIO +from pathlib import Path + +import rich +from graphblas.core.utils import _autogenerate_code +from rich.tree import Tree + +from graphblas_algorithms.tests import test_match_nx +from graphblas_algorithms.tests.test_match_nx import get_fullname + + +def get_fixture(attr): + return getattr(test_match_nx, attr).__wrapped__ + + +def trim(name): + for prefix in ["networkx.algorithms.", "networkx."]: + if name.startswith(prefix): + return name[len(prefix) :] + raise ValueError(f"{name!r} does not begin with a recognized prefix") + + +def get_names(): + nx_names_to_info = get_fixture("nx_names_to_info")(get_fixture("nx_info")()) + gb_names_to_info = get_fixture("gb_names_to_info")(get_fixture("gb_info")()) + implemented = nx_names_to_info.keys() & gb_names_to_info.keys() + return sorted(trim(get_fullname(next(iter(nx_names_to_info[name])))) for name in implemented) + + +# Dispatched functions that are only available from `nxapi` +SHORTPATH = { + "overall_reciprocity", + "reciprocity", +} + + +def main(print_to_console=True, update_readme=True, update_interface=True): + fullnames = get_names() + # Step 1: add to README.md + tree = Tree("graphblas_algorithms.nxapi") + subtrees = {} + + def addtree(path): + if path in subtrees: + rv = subtrees[path] + elif "." not in path: + rv = subtrees[path] = tree.add(path) + else: + subpath, last = path.rsplit(".", 1) + subtree = addtree(subpath) + rv = subtrees[path] = subtree.add(last) + return rv + + for fullname in fullnames: + addtree(fullname) + if print_to_console: + rich.print(tree) + if update_readme: + s = StringIO() + rich.print(tree, file=s) + s.seek(0) + text = s.read() + _autogenerate_code( + Path(__file__).parent.parent / "README.md", + f"\n```\n{text}```\n\n", + begin="[//]: # (Begin auto-generated code)", + end="[//]: # (End auto-generated code)", + callblack=False, + ) + # Step 2: add to interface.py + lines = [] + prev_mod = None + for fullname in fullnames: + mod, subpath = fullname.split(".", 1) + if mod != prev_mod: + if prev_mod is not None: + lines.append("") + prev_mod = mod + lines.append(f" mod = nxapi.{mod}") + lines.append(" # " + "=" * (len(mod) + 10)) + if " (" in subpath: + subpath, name = subpath.rsplit(" (", 1) + name = name.split(")")[0] + else: + name = subpath.rsplit(".", 1)[-1] + if name in SHORTPATH: + subpath = subpath.rsplit(".", 1)[-1] + lines.append(f" {name} = nxapi.{subpath}") + else: + lines.append(f" {name} = mod.{subpath}") + lines.append("") + lines.append(" del mod") + lines.append("") + text = "\n".join(lines) + if update_interface: + _autogenerate_code( + Path(__file__).parent.parent / "graphblas_algorithms" / "interface.py", + text, + specializer="dispatch", + ) + return tree + + +if __name__ == "__main__": + main()