r"""
Edge creation for same-voltage subgraphs using the Chung-Lu Chain (CLC) model.
Implements Algorithm 2 (CLC) from `Aksoy et al. (2018)
<https://academic.oup.com/comnet/article-abstract/7/1/128/5073058>`_
(arXiv:1711.11098, Appendix A.2).
"""
import numpy as np
import random
from typing import List, Set, Tuple, Dict
[docs]
class EdgeCreator:
r"""
Generate edges for a same-voltage subgraph via the CLC model.
The CLC model creates edges in three stages:
1. A deterministic **diameter path** connecting vertices across boxes
:math:`0, 1, \dots, \delta` to guarantee the desired diameter.
2. A deterministic **subdiameter path** providing alternate routes and
long cycles.
3. **Within-box fast Chung-Lu** random graphs, where each box is an
independent Chung-Lu realization.
See Algorithm 2 in `Aksoy et al. (2018) <https://doi.org/10.1093/comnet/cny016>`_.
"""
[docs]
def generate_edges(self, d_prime: np.ndarray, v: np.ndarray, D: Set[int], S: Set[int]) -> List[Tuple[int, int]]:
r"""
Generate the edge list for one same-voltage subgraph.
Implements **CLC**\ (:math:`\mathbf{d}', \mathbf{v}, D, S`) → :math:`E`
(Algorithm 2).
Parameters
----------
d_prime : numpy.ndarray
Inflated degree sequence from :meth:`Preprocessor.run_setup`.
v : numpy.ndarray
Vertex-to-box mapping where ``v[i]`` is the box ID for node *i*.
D : set of int
Diameter path vertex indices (one per box, spanning boxes
:math:`0` to :math:`\delta`).
S : set of int
Subdiameter path vertex indices.
Returns
-------
list of tuple[int, int]
Edge list as ``(u, w)`` pairs with ``u < w``.
Notes
-----
The within-box Chung-Lu stage uses the *fast Chung-Lu*
implementation: for each box :math:`k`, :math:`m_k` edges are
generated by sampling endpoints proportionally to their desired
degree (``random.choices``). Self-loops are discarded.
"""
edges = set()
# --- Lines 3-7: Make Diameter Path ---
# The paper iterates k=1..|D|-1 and finds nodes with box k and k+1.
# Since our boxes might be 0-indexed or non-contiguous (for S),
# a robust equivalent is to sort the path nodes by their box ID
# and connect adjacent ones.
d_path_nodes = sorted(list(D), key=lambda node_idx: v[node_idx])
# Connect node in Box k to node in Box k+1
for k in range(len(d_path_nodes) - 1):
u, w = d_path_nodes[k], d_path_nodes[k+1]
# Ensure we are connecting nodes in adjacent boxes (just a sanity check)
# Logic: The path is defined by the spatial sequence of boxes.
edges.add(tuple(sorted((u, w))))
# --- Lines 8-12: Make Subdiameter Path ---
s_path_nodes = sorted(list(S), key=lambda node_idx: v[node_idx])
for k in range(len(s_path_nodes) - 1):
u, w = s_path_nodes[k], s_path_nodes[k+1]
edges.add(tuple(sorted((u, w))))
# --- Lines 13-22: Chung-Lu graph in each box ---
# "For k = 1...max(v)" -> Iterate over all active boxes
unique_boxes = np.unique(v)
# Filter out -1 (unassigned) if any exists (though Algorithm 1 should fill them)
unique_boxes = unique_boxes[unique_boxes != -1]
for box_id in unique_boxes:
# Line 15: B_k <- {j : v_j = k} (All vertices in box k)
B_k = np.where(v == box_id)[0]
if len(B_k) < 2:
continue
# Line 16: m_k <- round(0.5 * sum(d_i for i in B_k))
degrees_in_box = d_prime[B_k]
sum_degrees = np.sum(degrees_in_box)
m_k = int(round(0.5 * sum_degrees))
if m_k <= 0:
continue
# Lines 17-21: Generate m_k edges probabilistically
# We use random.choices for weighted sampling with replacement.
# Weight for node i is d_i.
# Select 2 * m_k nodes (to form m_k pairs)
# This is equivalent to "Select i proportional to d_i, Select j proportional to d_j"
# repeated m_k times.
nodes_selected = random.choices(B_k, weights=degrees_in_box, k=2 * m_k)
# Pair them up
for idx in range(0, len(nodes_selected), 2):
i = nodes_selected[idx]
j = nodes_selected[idx+1]
# Line 20: E <- E U {i, j} (Discard loops)
if i != j:
# Sort to ensure (u, v) is same as (v, u) in set
edges.add(tuple(sorted((i, j))))
return list(edges)