Skip to content

Commit

Permalink
[Utils] Node homophily measure (#5376)
Browse files Browse the repository at this point in the history
* Update

* lint

* lint

* r prefix

* CI

* lint

* skip TF

* Update

---------

Co-authored-by: Ubuntu <ubuntu@ip-172-31-36-188.ap-northeast-1.compute.internal>
  • Loading branch information
mufeili and Ubuntu committed Feb 24, 2023
1 parent 5ffd2a0 commit fcf5ad5
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docs/source/api/python/dgl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,16 @@ set at each iteration. ``prop_edges_YYY`` applies traversal algorithm ``YYY`` an
prop_edges
prop_edges_dfs

Homophily Measures
-------------------------

Utilities for measuring homophily of a graph

.. autosummary::
:toctree: ../../generated/

node_homophily

Utilities
-----------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions python/dgl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
DGLGraph,
DGLGraph as DGLHeteroGraph,
)
from .homophily import *
from .merge import *
from .subgraph import *
from .traversal import *
Expand Down
57 changes: 57 additions & 0 deletions python/dgl/homophily.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Utils for tacking graph homophily and heterophily"""
from . import backend as F, function as fn

__all__ = ["node_homophily"]


def node_homophily(graph, y):
r"""Homophily measure from `Geom-GCN: Geometric Graph Convolutional Networks
<https://arxiv.org/abs/2002.05287>`__
We follow the practice of a later paper `Large Scale Learning on
Non-Homophilous Graphs: New Benchmarks and Strong Simple Methods
<https://arxiv.org/abs/2110.14446>`__ to call it node homophily.
Mathematically it is defined as follows:
.. math::
\frac{1}{|\mathcal{V}|} \sum_{v \in \mathcal{V}} \frac{ | \{ (u,v) : u
\in \mathcal{N}(v) \wedge y_v = y_u \} | } { |\mathcal{N}(v)| }
where :math:`\mathcal{V}` is the set of nodes, :math:`\mathcal{N}(v)` is
the predecessors of node :math:`v`, and :math:`y_v` is the class of node
:math:`v`.
Parameters
----------
graph : DGLGraph
The graph
y : Tensor
The node labels, which is a tensor of shape (|V|)
Returns
-------
float
The node homophily value
Examples
--------
>>> import dgl
>>> import torch
>>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3]))
>>> y = torch.tensor([0, 0, 0, 0, 1])
>>> dgl.node_homophily(graph, y)
0.6000000238418579
"""
with graph.local_scope():
src, dst = graph.edges()
# Handle the case where graph is of dtype int32.
src = F.astype(src, F.int64)
dst = F.astype(dst, F.int64)
# Compute y_v = y_u for all edges.
graph.edata["same_class"] = F.astype(y[src] == y[dst], F.float32)
graph.update_all(
fn.copy_e("same_class", "m"), fn.mean("m", "node_value")
)
return graph.ndata["node_value"].mean().item()
19 changes: 19 additions & 0 deletions tests/python/common/test_homophily.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest

import backend as F

import dgl
from test_utils import parametrize_idtype


@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="Skip TF")
@parametrize_idtype
def test_node_homophily(idtype):
# IfChangeThenChange: python/dgl/homophily.py
# Update the docstring example.
device = F.ctx()
graph = dgl.graph(
([1, 2, 0, 4], [0, 1, 2, 3]), idtype=idtype, device=device
)
y = F.tensor([0, 0, 0, 0, 1])
assert dgl.node_homophily(graph, y) == 0.6000000238418579

0 comments on commit fcf5ad5

Please sign in to comment.