1"""
2altgraph.Graph - Base Graph class
3=================================
4
5..
6  #--Version 2.1
7  #--Bob Ippolito October, 2004
8
9  #--Version 2.0
10  #--Istvan Albert June, 2004
11
12  #--Version 1.0
13  #--Nathan Denny, May 27, 1999
14"""
15
16from altgraph import GraphError
17from collections import deque
18
19class Graph(object):
20    """
21    The Graph class represents a directed graph with *N* nodes and *E* edges.
22
23    Naming conventions:
24
25    - the prefixes such as *out*, *inc* and *all* will refer to methods
26      that operate on the outgoing, incoming or all edges of that node.
27
28      For example: :py:meth:`inc_degree` will refer to the degree of the node
29      computed over the incoming edges (the number of neighbours linking to
30      the node).
31
32    - the prefixes such as *forw* and *back* will refer to the
33      orientation of the edges used in the method with respect to the node.
34
35      For example: :py:meth:`forw_bfs` will start at the node then use the outgoing
36      edges to traverse the graph (goes forward).
37    """
38
39    def __init__(self, edges=None):
40        """
41        Initialization
42        """
43
44        self.next_edge = 0
45        self.nodes, self.edges = {}, {}
46        self.hidden_edges, self.hidden_nodes = {}, {}
47
48        if edges is not None:
49            for item in edges:
50                if len(item) == 2:
51                    head, tail = item
52                    self.add_edge(head, tail)
53                elif len(item) == 3:
54                    head, tail, data = item
55                    self.add_edge(head, tail, data)
56                else:
57                    raise GraphError("Cannot create edge from %s"%(item,))
58
59
60    def __repr__(self):
61        return '<Graph: %d nodes, %d edges>' % (
62            self.number_of_nodes(), self.number_of_edges())
63
64    def add_node(self, node, node_data=None):
65        """
66        Adds a new node to the graph.  Arbitrary data can be attached to the
67        node via the node_data parameter.  Adding the same node twice will be
68        silently ignored.
69
70        The node must be a hashable value.
71        """
72        #
73        # the nodes will contain tuples that will store incoming edges,
74        # outgoing edges and data
75        #
76        # index 0 -> incoming edges
77        # index 1 -> outgoing edges
78
79        if node in self.hidden_nodes:
80            # Node is present, but hidden
81            return
82
83        if node not in self.nodes:
84            self.nodes[node] = ([], [], node_data)
85
86    def add_edge(self, head_id, tail_id, edge_data=1, create_nodes=True):
87        """
88        Adds a directed edge going from head_id to tail_id.
89        Arbitrary data can be attached to the edge via edge_data.
90        It may create the nodes if adding edges between nonexisting ones.
91
92        :param head_id: head node
93        :param tail_id: tail node
94        :param edge_data: (optional) data attached to the edge
95        :param create_nodes: (optional) creates the head_id or tail_id node in case they did not exist
96        """
97        # shorcut
98        edge = self.next_edge
99
100        # add nodes if on automatic node creation
101        if create_nodes:
102            self.add_node(head_id)
103            self.add_node(tail_id)
104
105        # update the corresponding incoming and outgoing lists in the nodes
106        # index 0 -> incoming edges
107        # index 1 -> outgoing edges
108
109        try:
110            self.nodes[tail_id][0].append(edge)
111            self.nodes[head_id][1].append(edge)
112        except KeyError:
113            raise GraphError('Invalid nodes %s -> %s' % (head_id, tail_id))
114
115        # store edge information
116        self.edges[edge] = (head_id, tail_id, edge_data)
117
118
119        self.next_edge += 1
120
121    def hide_edge(self, edge):
122        """
123        Hides an edge from the graph. The edge may be unhidden at some later
124        time.
125        """
126        try:
127            head_id, tail_id, edge_data = self.hidden_edges[edge] = self.edges[edge]
128            self.nodes[tail_id][0].remove(edge)
129            self.nodes[head_id][1].remove(edge)
130            del self.edges[edge]
131        except KeyError:
132            raise GraphError('Invalid edge %s' % edge)
133
134    def hide_node(self, node):
135        """
136        Hides a node from the graph.  The incoming and outgoing edges of the
137        node will also be hidden.  The node may be unhidden at some later time.
138        """
139        try:
140            all_edges = self.all_edges(node)
141            self.hidden_nodes[node] = (self.nodes[node], all_edges)
142            for edge in all_edges:
143                self.hide_edge(edge)
144            del self.nodes[node]
145        except KeyError:
146            raise GraphError('Invalid node %s' % node)
147
148    def restore_node(self, node):
149        """
150        Restores a previously hidden node back into the graph and restores
151        all of its incoming and outgoing edges.
152        """
153        try:
154            self.nodes[node], all_edges = self.hidden_nodes[node]
155            for edge in all_edges:
156                self.restore_edge(edge)
157            del self.hidden_nodes[node]
158        except KeyError:
159            raise GraphError('Invalid node %s' % node)
160
161    def restore_edge(self, edge):
162        """
163        Restores a previously hidden edge back into the graph.
164        """
165        try:
166            head_id, tail_id, data = self.hidden_edges[edge]
167            self.nodes[tail_id][0].append(edge)
168            self.nodes[head_id][1].append(edge)
169            self.edges[edge] = head_id, tail_id, data
170            del self.hidden_edges[edge]
171        except KeyError:
172            raise GraphError('Invalid edge %s' % edge)
173
174    def restore_all_edges(self):
175        """
176        Restores all hidden edges.
177        """
178        for edge in list(self.hidden_edges.keys()):
179            try:
180                self.restore_edge(edge)
181            except GraphError:
182                pass
183
184    def restore_all_nodes(self):
185        """
186        Restores all hidden nodes.
187        """
188        for node in list(self.hidden_nodes.keys()):
189            self.restore_node(node)
190
191    def __contains__(self, node):
192        """
193        Test whether a node is in the graph
194        """
195        return node in self.nodes
196
197    def edge_by_id(self, edge):
198        """
199        Returns the edge that connects the head_id and tail_id nodes
200        """
201        try:
202            head, tail, data =  self.edges[edge]
203        except KeyError:
204            head, tail = None, None
205            raise GraphError('Invalid edge %s' % edge)
206
207        return (head, tail)
208
209    def edge_by_node(self, head, tail):
210        """
211        Returns the edge that connects the head_id and tail_id nodes
212        """
213        for edge in self.out_edges(head):
214            if self.tail(edge) == tail:
215                return edge
216        return None
217
218    def number_of_nodes(self):
219        """
220        Returns the number of nodes
221        """
222        return len(self.nodes)
223
224    def number_of_edges(self):
225        """
226        Returns the number of edges
227        """
228        return len(self.edges)
229
230    def __iter__(self):
231        """
232        Iterates over all nodes in the graph
233        """
234        return iter(self.nodes)
235
236    def node_list(self):
237        """
238        Return a list of the node ids for all visible nodes in the graph.
239        """
240        return list(self.nodes.keys())
241
242    def edge_list(self):
243        """
244        Returns an iterator for all visible nodes in the graph.
245        """
246        return list(self.edges.keys())
247
248    def number_of_hidden_edges(self):
249        """
250        Returns the number of hidden edges
251        """
252        return len(self.hidden_edges)
253
254    def number_of_hidden_nodes(self):
255        """
256        Returns the number of hidden nodes
257        """
258        return len(self.hidden_nodes)
259
260    def hidden_node_list(self):
261        """
262        Returns the list with the hidden nodes
263        """
264        return list(self.hidden_nodes.keys())
265
266    def hidden_edge_list(self):
267        """
268        Returns a list with the hidden edges
269        """
270        return list(self.hidden_edges.keys())
271
272    def describe_node(self, node):
273        """
274        return node, node data, outgoing edges, incoming edges for node
275        """
276        incoming, outgoing, data = self.nodes[node]
277        return node, data, outgoing, incoming
278
279    def describe_edge(self, edge):
280        """
281        return edge, edge data, head, tail for edge
282        """
283        head, tail, data = self.edges[edge]
284        return edge, data, head, tail
285
286    def node_data(self, node):
287        """
288        Returns the data associated with a node
289        """
290        return self.nodes[node][2]
291
292    def edge_data(self, edge):
293        """
294        Returns the data associated with an edge
295        """
296        return self.edges[edge][2]
297
298    def update_edge_data(self, edge, edge_data):
299        """
300        Replace the edge data for a specific edge
301        """
302        self.edges[edge] = self.edges[edge][0:2] + (edge_data,)
303
304    def head(self, edge):
305        """
306        Returns the node of the head of the edge.
307        """
308        return self.edges[edge][0]
309
310    def tail(self, edge):
311        """
312        Returns node of the tail of the edge.
313        """
314        return self.edges[edge][1]
315
316    def out_nbrs(self, node):
317        """
318        List of nodes connected by outgoing edges
319        """
320        l = [self.tail(n) for n in self.out_edges(node)]
321        return l
322
323    def inc_nbrs(self, node):
324        """
325        List of nodes connected by incoming edges
326        """
327        l = [self.head(n) for n in self.inc_edges(node)]
328        return l
329
330    def all_nbrs(self, node):
331        """
332        List of nodes connected by incoming and outgoing edges
333        """
334        l = dict.fromkeys( self.inc_nbrs(node) + self.out_nbrs(node) )
335        return list(l)
336
337    def out_edges(self, node):
338        """
339        Returns a list of the outgoing edges
340        """
341        try:
342            return list(self.nodes[node][1])
343        except KeyError:
344            raise GraphError('Invalid node %s' % node)
345
346        return None
347
348    def inc_edges(self, node):
349        """
350        Returns a list of the incoming edges
351        """
352        try:
353            return list(self.nodes[node][0])
354        except KeyError:
355            raise GraphError('Invalid node %s' % node)
356
357        return None
358
359    def all_edges(self, node):
360        """
361        Returns a list of incoming and outging edges.
362        """
363        return set(self.inc_edges(node) + self.out_edges(node))
364
365    def out_degree(self, node):
366        """
367        Returns the number of outgoing edges
368        """
369        return len(self.out_edges(node))
370
371    def inc_degree(self, node):
372        """
373        Returns the number of incoming edges
374        """
375        return len(self.inc_edges(node))
376
377    def all_degree(self, node):
378        """
379        The total degree of a node
380        """
381        return self.inc_degree(node) + self.out_degree(node)
382
383    def _topo_sort(self, forward=True):
384        """
385        Topological sort.
386
387        Returns a list of nodes where the successors (based on outgoing and
388        incoming edges selected by the forward parameter) of any given node
389        appear in the sequence after that node.
390        """
391        topo_list = []
392        queue = deque()
393        indeg = {}
394
395        # select the operation that will be performed
396        if forward:
397            get_edges = self.out_edges
398            get_degree = self.inc_degree
399            get_next = self.tail
400        else:
401            get_edges = self.inc_edges
402            get_degree = self.out_degree
403            get_next = self.head
404
405        for node in self.node_list():
406            degree = get_degree(node)
407            if degree:
408                indeg[node] = degree
409            else:
410                queue.append(node)
411
412        while queue:
413            curr_node = queue.popleft()
414            topo_list.append(curr_node)
415            for edge in get_edges(curr_node):
416                tail_id = get_next(edge)
417                if tail_id in indeg:
418                    indeg[tail_id] -= 1
419                    if indeg[tail_id] == 0:
420                        queue.append(tail_id)
421
422        if len(topo_list) == len(self.node_list()):
423            valid = True
424        else:
425            # the graph has cycles, invalid topological sort
426            valid = False
427
428        return (valid, topo_list)
429
430    def forw_topo_sort(self):
431        """
432        Topological sort.
433
434        Returns a list of nodes where the successors (based on outgoing edges)
435        of any given node appear in the sequence after that node.
436        """
437        return self._topo_sort(forward=True)
438
439    def back_topo_sort(self):
440        """
441        Reverse topological sort.
442
443        Returns a list of nodes where the successors (based on incoming edges)
444        of any given node appear in the sequence after that node.
445        """
446        return self._topo_sort(forward=False)
447
448    def _bfs_subgraph(self, start_id, forward=True):
449        """
450        Private method creates a subgraph in a bfs order.
451
452        The forward parameter specifies whether it is a forward or backward
453        traversal.
454        """
455        if forward:
456            get_bfs  = self.forw_bfs
457            get_nbrs = self.out_nbrs
458        else:
459            get_bfs  = self.back_bfs
460            get_nbrs = self.inc_nbrs
461
462        g = Graph()
463        bfs_list = get_bfs(start_id)
464        for node in bfs_list:
465            g.add_node(node)
466
467        for node in bfs_list:
468            for nbr_id in get_nbrs(node):
469                g.add_edge(node, nbr_id)
470
471        return g
472
473    def forw_bfs_subgraph(self, start_id):
474        """
475        Creates and returns a subgraph consisting of the breadth first
476        reachable nodes based on their outgoing edges.
477        """
478        return self._bfs_subgraph(start_id, forward=True)
479
480    def back_bfs_subgraph(self, start_id):
481        """
482        Creates and returns a subgraph consisting of the breadth first
483        reachable nodes based on the incoming edges.
484        """
485        return self._bfs_subgraph(start_id, forward=False)
486
487    def iterdfs(self, start, end=None, forward=True):
488        """
489        Collecting nodes in some depth first traversal.
490
491        The forward parameter specifies whether it is a forward or backward
492        traversal.
493        """
494        visited, stack = set([start]), deque([start])
495
496        if forward:
497            get_edges = self.out_edges
498            get_next = self.tail
499        else:
500            get_edges = self.inc_edges
501            get_next = self.head
502
503        while stack:
504            curr_node = stack.pop()
505            yield curr_node
506            if curr_node == end:
507                break
508            for edge in sorted(get_edges(curr_node)):
509                tail = get_next(edge)
510                if tail not in visited:
511                    visited.add(tail)
512                    stack.append(tail)
513
514    def iterdata(self, start, end=None, forward=True, condition=None):
515        """
516        Perform a depth-first walk of the graph (as ``iterdfs``)
517        and yield the item data of every node where condition matches. The
518        condition callback is only called when node_data is not None.
519        """
520
521        visited, stack = set([start]), deque([start])
522
523        if forward:
524            get_edges = self.out_edges
525            get_next = self.tail
526        else:
527            get_edges = self.inc_edges
528            get_next = self.head
529
530        get_data = self.node_data
531
532        while stack:
533            curr_node = stack.pop()
534            curr_data = get_data(curr_node)
535            if curr_data is not None:
536                if condition is not None and not condition(curr_data):
537                    continue
538                yield curr_data
539            if curr_node == end:
540                break
541            for edge in get_edges(curr_node):
542                tail = get_next(edge)
543                if tail not in visited:
544                    visited.add(tail)
545                    stack.append(tail)
546
547    def _iterbfs(self, start, end=None, forward=True):
548        """
549        The forward parameter specifies whether it is a forward or backward
550        traversal.  Returns a list of tuples where the first value is the hop
551        value the second value is the node id.
552        """
553        queue, visited = deque([(start, 0)]), set([start])
554
555        # the direction of the bfs depends on the edges that are sampled
556        if forward:
557            get_edges = self.out_edges
558            get_next = self.tail
559        else:
560            get_edges = self.inc_edges
561            get_next = self.head
562
563        while queue:
564            curr_node, curr_step = queue.popleft()
565            yield (curr_node, curr_step)
566            if curr_node == end:
567                break
568            for edge in get_edges(curr_node):
569                tail = get_next(edge)
570                if tail not in visited:
571                    visited.add(tail)
572                    queue.append((tail, curr_step + 1))
573
574
575    def forw_bfs(self, start, end=None):
576        """
577        Returns a list of nodes in some forward BFS order.
578
579        Starting from the start node the breadth first search proceeds along
580        outgoing edges.
581        """
582        return [node for node, step in self._iterbfs(start, end, forward=True)]
583
584    def back_bfs(self, start, end=None):
585        """
586        Returns a list of nodes in some backward BFS order.
587
588        Starting from the start node the breadth first search proceeds along
589        incoming edges.
590        """
591        return [node for node, step in self._iterbfs(start, end, forward=False)]
592
593    def forw_dfs(self, start, end=None):
594        """
595        Returns a list of nodes in some forward DFS order.
596
597        Starting with the start node the depth first search proceeds along
598        outgoing edges.
599        """
600        return list(self.iterdfs(start, end, forward=True))
601
602    def back_dfs(self, start, end=None):
603        """
604        Returns a list of nodes in some backward DFS order.
605
606        Starting from the start node the depth first search proceeds along
607        incoming edges.
608        """
609        return list(self.iterdfs(start, end, forward=False))
610
611    def connected(self):
612        """
613        Returns :py:data:`True` if the graph's every node can be reached from every
614        other node.
615        """
616        node_list = self.node_list()
617        for node in node_list:
618            bfs_list = self.forw_bfs(node)
619            if len(bfs_list) != len(node_list):
620                return False
621        return True
622
623    def clust_coef(self, node):
624        """
625        Computes and returns the local clustering coefficient of node.  The
626        local cluster coefficient is proportion of the actual number of edges between
627        neighbours of node and the maximum number of edges between those neighbours.
628
629        See <http://en.wikipedia.org/wiki/Clustering_coefficient#Local_clustering_coefficient>
630        for a formal definition.
631        """
632        num = 0
633        nbr_set = set(self.out_nbrs(node))
634
635        if node in nbr_set:
636            nbr_set.remove(node) # loop defense
637
638        for nbr in nbr_set:
639            sec_set = set(self.out_nbrs(nbr))
640            if nbr in sec_set:
641                sec_set.remove(nbr) # loop defense
642            num += len(nbr_set & sec_set)
643
644        nbr_num = len(nbr_set)
645        if nbr_num:
646            clust_coef = float(num) / (nbr_num * (nbr_num - 1))
647        else:
648            clust_coef = 0.0
649        return clust_coef
650
651    def get_hops(self, start, end=None, forward=True):
652        """
653        Computes the hop distance to all nodes centered around a specified node.
654
655        First order neighbours are at hop 1, their neigbours are at hop 2 etc.
656        Uses :py:meth:`forw_bfs` or :py:meth:`back_bfs` depending on the value of the forward
657        parameter.  If the distance between all neighbouring nodes is 1 the hop
658        number corresponds to the shortest distance between the nodes.
659
660        :param start: the starting node
661        :param end: ending node (optional). When not specified will search the whole graph.
662        :param forward: directionality parameter (optional). If C{True} (default) it uses L{forw_bfs} otherwise L{back_bfs}.
663        :return: returns a list of tuples where each tuple contains the node and the hop.
664
665        Typical usage::
666
667            >>> print (graph.get_hops(1, 8))
668            >>> [(1, 0), (2, 1), (3, 1), (4, 2), (5, 3), (7, 4), (8, 5)]
669            # node 1 is at 0 hops
670            # node 2 is at 1 hop
671            # ...
672            # node 8 is at 5 hops
673        """
674        if forward:
675            return list(self._iterbfs(start=start, end=end, forward=True))
676        else:
677            return list(self._iterbfs(start=start, end=end, forward=False))
678