1""" 2altgraph.Graph - Base Graph class 3================================= 4 5.. 6 #--Version 2.1 7 #--Bob Ippolito October, 2004 8 9 #--Version 2.0 10 #--Istvan Albert June, 2004 11 12 #--Version 1.0 13 #--Nathan Denny, May 27, 1999 14""" 15 16from altgraph import GraphError 17from collections import deque 18 19class Graph(object): 20 """ 21 The Graph class represents a directed graph with *N* nodes and *E* edges. 22 23 Naming conventions: 24 25 - the prefixes such as *out*, *inc* and *all* will refer to methods 26 that operate on the outgoing, incoming or all edges of that node. 27 28 For example: :py:meth:`inc_degree` will refer to the degree of the node 29 computed over the incoming edges (the number of neighbours linking to 30 the node). 31 32 - the prefixes such as *forw* and *back* will refer to the 33 orientation of the edges used in the method with respect to the node. 34 35 For example: :py:meth:`forw_bfs` will start at the node then use the outgoing 36 edges to traverse the graph (goes forward). 37 """ 38 39 def __init__(self, edges=None): 40 """ 41 Initialization 42 """ 43 44 self.next_edge = 0 45 self.nodes, self.edges = {}, {} 46 self.hidden_edges, self.hidden_nodes = {}, {} 47 48 if edges is not None: 49 for item in edges: 50 if len(item) == 2: 51 head, tail = item 52 self.add_edge(head, tail) 53 elif len(item) == 3: 54 head, tail, data = item 55 self.add_edge(head, tail, data) 56 else: 57 raise GraphError("Cannot create edge from %s"%(item,)) 58 59 60 def __repr__(self): 61 return '<Graph: %d nodes, %d edges>' % ( 62 self.number_of_nodes(), self.number_of_edges()) 63 64 def add_node(self, node, node_data=None): 65 """ 66 Adds a new node to the graph. Arbitrary data can be attached to the 67 node via the node_data parameter. Adding the same node twice will be 68 silently ignored. 69 70 The node must be a hashable value. 71 """ 72 # 73 # the nodes will contain tuples that will store incoming edges, 74 # outgoing edges and data 75 # 76 # index 0 -> incoming edges 77 # index 1 -> outgoing edges 78 79 if node in self.hidden_nodes: 80 # Node is present, but hidden 81 return 82 83 if node not in self.nodes: 84 self.nodes[node] = ([], [], node_data) 85 86 def add_edge(self, head_id, tail_id, edge_data=1, create_nodes=True): 87 """ 88 Adds a directed edge going from head_id to tail_id. 89 Arbitrary data can be attached to the edge via edge_data. 90 It may create the nodes if adding edges between nonexisting ones. 91 92 :param head_id: head node 93 :param tail_id: tail node 94 :param edge_data: (optional) data attached to the edge 95 :param create_nodes: (optional) creates the head_id or tail_id node in case they did not exist 96 """ 97 # shorcut 98 edge = self.next_edge 99 100 # add nodes if on automatic node creation 101 if create_nodes: 102 self.add_node(head_id) 103 self.add_node(tail_id) 104 105 # update the corresponding incoming and outgoing lists in the nodes 106 # index 0 -> incoming edges 107 # index 1 -> outgoing edges 108 109 try: 110 self.nodes[tail_id][0].append(edge) 111 self.nodes[head_id][1].append(edge) 112 except KeyError: 113 raise GraphError('Invalid nodes %s -> %s' % (head_id, tail_id)) 114 115 # store edge information 116 self.edges[edge] = (head_id, tail_id, edge_data) 117 118 119 self.next_edge += 1 120 121 def hide_edge(self, edge): 122 """ 123 Hides an edge from the graph. The edge may be unhidden at some later 124 time. 125 """ 126 try: 127 head_id, tail_id, edge_data = self.hidden_edges[edge] = self.edges[edge] 128 self.nodes[tail_id][0].remove(edge) 129 self.nodes[head_id][1].remove(edge) 130 del self.edges[edge] 131 except KeyError: 132 raise GraphError('Invalid edge %s' % edge) 133 134 def hide_node(self, node): 135 """ 136 Hides a node from the graph. The incoming and outgoing edges of the 137 node will also be hidden. The node may be unhidden at some later time. 138 """ 139 try: 140 all_edges = self.all_edges(node) 141 self.hidden_nodes[node] = (self.nodes[node], all_edges) 142 for edge in all_edges: 143 self.hide_edge(edge) 144 del self.nodes[node] 145 except KeyError: 146 raise GraphError('Invalid node %s' % node) 147 148 def restore_node(self, node): 149 """ 150 Restores a previously hidden node back into the graph and restores 151 all of its incoming and outgoing edges. 152 """ 153 try: 154 self.nodes[node], all_edges = self.hidden_nodes[node] 155 for edge in all_edges: 156 self.restore_edge(edge) 157 del self.hidden_nodes[node] 158 except KeyError: 159 raise GraphError('Invalid node %s' % node) 160 161 def restore_edge(self, edge): 162 """ 163 Restores a previously hidden edge back into the graph. 164 """ 165 try: 166 head_id, tail_id, data = self.hidden_edges[edge] 167 self.nodes[tail_id][0].append(edge) 168 self.nodes[head_id][1].append(edge) 169 self.edges[edge] = head_id, tail_id, data 170 del self.hidden_edges[edge] 171 except KeyError: 172 raise GraphError('Invalid edge %s' % edge) 173 174 def restore_all_edges(self): 175 """ 176 Restores all hidden edges. 177 """ 178 for edge in list(self.hidden_edges.keys()): 179 try: 180 self.restore_edge(edge) 181 except GraphError: 182 pass 183 184 def restore_all_nodes(self): 185 """ 186 Restores all hidden nodes. 187 """ 188 for node in list(self.hidden_nodes.keys()): 189 self.restore_node(node) 190 191 def __contains__(self, node): 192 """ 193 Test whether a node is in the graph 194 """ 195 return node in self.nodes 196 197 def edge_by_id(self, edge): 198 """ 199 Returns the edge that connects the head_id and tail_id nodes 200 """ 201 try: 202 head, tail, data = self.edges[edge] 203 except KeyError: 204 head, tail = None, None 205 raise GraphError('Invalid edge %s' % edge) 206 207 return (head, tail) 208 209 def edge_by_node(self, head, tail): 210 """ 211 Returns the edge that connects the head_id and tail_id nodes 212 """ 213 for edge in self.out_edges(head): 214 if self.tail(edge) == tail: 215 return edge 216 return None 217 218 def number_of_nodes(self): 219 """ 220 Returns the number of nodes 221 """ 222 return len(self.nodes) 223 224 def number_of_edges(self): 225 """ 226 Returns the number of edges 227 """ 228 return len(self.edges) 229 230 def __iter__(self): 231 """ 232 Iterates over all nodes in the graph 233 """ 234 return iter(self.nodes) 235 236 def node_list(self): 237 """ 238 Return a list of the node ids for all visible nodes in the graph. 239 """ 240 return list(self.nodes.keys()) 241 242 def edge_list(self): 243 """ 244 Returns an iterator for all visible nodes in the graph. 245 """ 246 return list(self.edges.keys()) 247 248 def number_of_hidden_edges(self): 249 """ 250 Returns the number of hidden edges 251 """ 252 return len(self.hidden_edges) 253 254 def number_of_hidden_nodes(self): 255 """ 256 Returns the number of hidden nodes 257 """ 258 return len(self.hidden_nodes) 259 260 def hidden_node_list(self): 261 """ 262 Returns the list with the hidden nodes 263 """ 264 return list(self.hidden_nodes.keys()) 265 266 def hidden_edge_list(self): 267 """ 268 Returns a list with the hidden edges 269 """ 270 return list(self.hidden_edges.keys()) 271 272 def describe_node(self, node): 273 """ 274 return node, node data, outgoing edges, incoming edges for node 275 """ 276 incoming, outgoing, data = self.nodes[node] 277 return node, data, outgoing, incoming 278 279 def describe_edge(self, edge): 280 """ 281 return edge, edge data, head, tail for edge 282 """ 283 head, tail, data = self.edges[edge] 284 return edge, data, head, tail 285 286 def node_data(self, node): 287 """ 288 Returns the data associated with a node 289 """ 290 return self.nodes[node][2] 291 292 def edge_data(self, edge): 293 """ 294 Returns the data associated with an edge 295 """ 296 return self.edges[edge][2] 297 298 def update_edge_data(self, edge, edge_data): 299 """ 300 Replace the edge data for a specific edge 301 """ 302 self.edges[edge] = self.edges[edge][0:2] + (edge_data,) 303 304 def head(self, edge): 305 """ 306 Returns the node of the head of the edge. 307 """ 308 return self.edges[edge][0] 309 310 def tail(self, edge): 311 """ 312 Returns node of the tail of the edge. 313 """ 314 return self.edges[edge][1] 315 316 def out_nbrs(self, node): 317 """ 318 List of nodes connected by outgoing edges 319 """ 320 l = [self.tail(n) for n in self.out_edges(node)] 321 return l 322 323 def inc_nbrs(self, node): 324 """ 325 List of nodes connected by incoming edges 326 """ 327 l = [self.head(n) for n in self.inc_edges(node)] 328 return l 329 330 def all_nbrs(self, node): 331 """ 332 List of nodes connected by incoming and outgoing edges 333 """ 334 l = dict.fromkeys( self.inc_nbrs(node) + self.out_nbrs(node) ) 335 return list(l) 336 337 def out_edges(self, node): 338 """ 339 Returns a list of the outgoing edges 340 """ 341 try: 342 return list(self.nodes[node][1]) 343 except KeyError: 344 raise GraphError('Invalid node %s' % node) 345 346 return None 347 348 def inc_edges(self, node): 349 """ 350 Returns a list of the incoming edges 351 """ 352 try: 353 return list(self.nodes[node][0]) 354 except KeyError: 355 raise GraphError('Invalid node %s' % node) 356 357 return None 358 359 def all_edges(self, node): 360 """ 361 Returns a list of incoming and outging edges. 362 """ 363 return set(self.inc_edges(node) + self.out_edges(node)) 364 365 def out_degree(self, node): 366 """ 367 Returns the number of outgoing edges 368 """ 369 return len(self.out_edges(node)) 370 371 def inc_degree(self, node): 372 """ 373 Returns the number of incoming edges 374 """ 375 return len(self.inc_edges(node)) 376 377 def all_degree(self, node): 378 """ 379 The total degree of a node 380 """ 381 return self.inc_degree(node) + self.out_degree(node) 382 383 def _topo_sort(self, forward=True): 384 """ 385 Topological sort. 386 387 Returns a list of nodes where the successors (based on outgoing and 388 incoming edges selected by the forward parameter) of any given node 389 appear in the sequence after that node. 390 """ 391 topo_list = [] 392 queue = deque() 393 indeg = {} 394 395 # select the operation that will be performed 396 if forward: 397 get_edges = self.out_edges 398 get_degree = self.inc_degree 399 get_next = self.tail 400 else: 401 get_edges = self.inc_edges 402 get_degree = self.out_degree 403 get_next = self.head 404 405 for node in self.node_list(): 406 degree = get_degree(node) 407 if degree: 408 indeg[node] = degree 409 else: 410 queue.append(node) 411 412 while queue: 413 curr_node = queue.popleft() 414 topo_list.append(curr_node) 415 for edge in get_edges(curr_node): 416 tail_id = get_next(edge) 417 if tail_id in indeg: 418 indeg[tail_id] -= 1 419 if indeg[tail_id] == 0: 420 queue.append(tail_id) 421 422 if len(topo_list) == len(self.node_list()): 423 valid = True 424 else: 425 # the graph has cycles, invalid topological sort 426 valid = False 427 428 return (valid, topo_list) 429 430 def forw_topo_sort(self): 431 """ 432 Topological sort. 433 434 Returns a list of nodes where the successors (based on outgoing edges) 435 of any given node appear in the sequence after that node. 436 """ 437 return self._topo_sort(forward=True) 438 439 def back_topo_sort(self): 440 """ 441 Reverse topological sort. 442 443 Returns a list of nodes where the successors (based on incoming edges) 444 of any given node appear in the sequence after that node. 445 """ 446 return self._topo_sort(forward=False) 447 448 def _bfs_subgraph(self, start_id, forward=True): 449 """ 450 Private method creates a subgraph in a bfs order. 451 452 The forward parameter specifies whether it is a forward or backward 453 traversal. 454 """ 455 if forward: 456 get_bfs = self.forw_bfs 457 get_nbrs = self.out_nbrs 458 else: 459 get_bfs = self.back_bfs 460 get_nbrs = self.inc_nbrs 461 462 g = Graph() 463 bfs_list = get_bfs(start_id) 464 for node in bfs_list: 465 g.add_node(node) 466 467 for node in bfs_list: 468 for nbr_id in get_nbrs(node): 469 g.add_edge(node, nbr_id) 470 471 return g 472 473 def forw_bfs_subgraph(self, start_id): 474 """ 475 Creates and returns a subgraph consisting of the breadth first 476 reachable nodes based on their outgoing edges. 477 """ 478 return self._bfs_subgraph(start_id, forward=True) 479 480 def back_bfs_subgraph(self, start_id): 481 """ 482 Creates and returns a subgraph consisting of the breadth first 483 reachable nodes based on the incoming edges. 484 """ 485 return self._bfs_subgraph(start_id, forward=False) 486 487 def iterdfs(self, start, end=None, forward=True): 488 """ 489 Collecting nodes in some depth first traversal. 490 491 The forward parameter specifies whether it is a forward or backward 492 traversal. 493 """ 494 visited, stack = set([start]), deque([start]) 495 496 if forward: 497 get_edges = self.out_edges 498 get_next = self.tail 499 else: 500 get_edges = self.inc_edges 501 get_next = self.head 502 503 while stack: 504 curr_node = stack.pop() 505 yield curr_node 506 if curr_node == end: 507 break 508 for edge in sorted(get_edges(curr_node)): 509 tail = get_next(edge) 510 if tail not in visited: 511 visited.add(tail) 512 stack.append(tail) 513 514 def iterdata(self, start, end=None, forward=True, condition=None): 515 """ 516 Perform a depth-first walk of the graph (as ``iterdfs``) 517 and yield the item data of every node where condition matches. The 518 condition callback is only called when node_data is not None. 519 """ 520 521 visited, stack = set([start]), deque([start]) 522 523 if forward: 524 get_edges = self.out_edges 525 get_next = self.tail 526 else: 527 get_edges = self.inc_edges 528 get_next = self.head 529 530 get_data = self.node_data 531 532 while stack: 533 curr_node = stack.pop() 534 curr_data = get_data(curr_node) 535 if curr_data is not None: 536 if condition is not None and not condition(curr_data): 537 continue 538 yield curr_data 539 if curr_node == end: 540 break 541 for edge in get_edges(curr_node): 542 tail = get_next(edge) 543 if tail not in visited: 544 visited.add(tail) 545 stack.append(tail) 546 547 def _iterbfs(self, start, end=None, forward=True): 548 """ 549 The forward parameter specifies whether it is a forward or backward 550 traversal. Returns a list of tuples where the first value is the hop 551 value the second value is the node id. 552 """ 553 queue, visited = deque([(start, 0)]), set([start]) 554 555 # the direction of the bfs depends on the edges that are sampled 556 if forward: 557 get_edges = self.out_edges 558 get_next = self.tail 559 else: 560 get_edges = self.inc_edges 561 get_next = self.head 562 563 while queue: 564 curr_node, curr_step = queue.popleft() 565 yield (curr_node, curr_step) 566 if curr_node == end: 567 break 568 for edge in get_edges(curr_node): 569 tail = get_next(edge) 570 if tail not in visited: 571 visited.add(tail) 572 queue.append((tail, curr_step + 1)) 573 574 575 def forw_bfs(self, start, end=None): 576 """ 577 Returns a list of nodes in some forward BFS order. 578 579 Starting from the start node the breadth first search proceeds along 580 outgoing edges. 581 """ 582 return [node for node, step in self._iterbfs(start, end, forward=True)] 583 584 def back_bfs(self, start, end=None): 585 """ 586 Returns a list of nodes in some backward BFS order. 587 588 Starting from the start node the breadth first search proceeds along 589 incoming edges. 590 """ 591 return [node for node, step in self._iterbfs(start, end, forward=False)] 592 593 def forw_dfs(self, start, end=None): 594 """ 595 Returns a list of nodes in some forward DFS order. 596 597 Starting with the start node the depth first search proceeds along 598 outgoing edges. 599 """ 600 return list(self.iterdfs(start, end, forward=True)) 601 602 def back_dfs(self, start, end=None): 603 """ 604 Returns a list of nodes in some backward DFS order. 605 606 Starting from the start node the depth first search proceeds along 607 incoming edges. 608 """ 609 return list(self.iterdfs(start, end, forward=False)) 610 611 def connected(self): 612 """ 613 Returns :py:data:`True` if the graph's every node can be reached from every 614 other node. 615 """ 616 node_list = self.node_list() 617 for node in node_list: 618 bfs_list = self.forw_bfs(node) 619 if len(bfs_list) != len(node_list): 620 return False 621 return True 622 623 def clust_coef(self, node): 624 """ 625 Computes and returns the local clustering coefficient of node. The 626 local cluster coefficient is proportion of the actual number of edges between 627 neighbours of node and the maximum number of edges between those neighbours. 628 629 See <http://en.wikipedia.org/wiki/Clustering_coefficient#Local_clustering_coefficient> 630 for a formal definition. 631 """ 632 num = 0 633 nbr_set = set(self.out_nbrs(node)) 634 635 if node in nbr_set: 636 nbr_set.remove(node) # loop defense 637 638 for nbr in nbr_set: 639 sec_set = set(self.out_nbrs(nbr)) 640 if nbr in sec_set: 641 sec_set.remove(nbr) # loop defense 642 num += len(nbr_set & sec_set) 643 644 nbr_num = len(nbr_set) 645 if nbr_num: 646 clust_coef = float(num) / (nbr_num * (nbr_num - 1)) 647 else: 648 clust_coef = 0.0 649 return clust_coef 650 651 def get_hops(self, start, end=None, forward=True): 652 """ 653 Computes the hop distance to all nodes centered around a specified node. 654 655 First order neighbours are at hop 1, their neigbours are at hop 2 etc. 656 Uses :py:meth:`forw_bfs` or :py:meth:`back_bfs` depending on the value of the forward 657 parameter. If the distance between all neighbouring nodes is 1 the hop 658 number corresponds to the shortest distance between the nodes. 659 660 :param start: the starting node 661 :param end: ending node (optional). When not specified will search the whole graph. 662 :param forward: directionality parameter (optional). If C{True} (default) it uses L{forw_bfs} otherwise L{back_bfs}. 663 :return: returns a list of tuples where each tuple contains the node and the hop. 664 665 Typical usage:: 666 667 >>> print (graph.get_hops(1, 8)) 668 >>> [(1, 0), (2, 1), (3, 1), (4, 2), (5, 3), (7, 4), (8, 5)] 669 # node 1 is at 0 hops 670 # node 2 is at 1 hop 671 # ... 672 # node 8 is at 5 hops 673 """ 674 if forward: 675 return list(self._iterbfs(start=start, end=end, forward=True)) 676 else: 677 return list(self._iterbfs(start=start, end=end, forward=False)) 678