1#    Copyright 2015-2017 ARM Limited
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16import pandas as pd
17import numpy as np
18
19"""Generic functions that can be used in multiple places in trappy
20"""
21
22def listify(to_select):
23    """Utitlity function to handle both single and
24    list inputs
25    """
26
27    if not isinstance(to_select, list):
28        to_select = [to_select]
29
30    return to_select
31
32def handle_duplicate_index(data,
33                           max_delta=0.000001):
34    """Handle duplicate values in index
35
36    :param data: The timeseries input
37    :type data: :mod:`pandas.Series`
38
39    :param max_delta: Maximum interval adjustment value that
40        will be added to duplicate indices
41    :type max_delta: float
42
43    Consider the following case where a series needs to be reindexed
44    to a new index (which can be required when different series need to
45    be combined and compared):
46    ::
47
48        import pandas
49        values = [0, 1, 2, 3, 4]
50        index = [0.0, 1.0, 1.0, 6.0, 7.0]
51        series = pandas.Series(values, index=index)
52        new_index = [0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0]
53        series.reindex(new_index)
54
55    The above code fails with:
56    ::
57
58        ValueError: cannot reindex from a duplicate axis
59
60    The function :func:`handle_duplicate_axis` changes the duplicate values
61    to
62    ::
63
64        >>> import pandas
65        >>> from trappy.utils import handle_duplicate_index
66
67        >>> values = [0, 1, 2, 3, 4]
68        index = [0.0, 1.0, 1.0, 6.0, 7.0]
69        series = pandas.Series(values, index=index)
70        series = handle_duplicate_index(series)
71        print series.index.values
72        >>> [ 0.        1.        1.000001  6.        7.      ]
73
74    """
75
76    index = data.index
77    new_index = index.values
78
79    dups = index.get_duplicates()
80
81    for dup in dups:
82        # Leave one of the values intact
83        dup_index_left = index.searchsorted(dup, side="left")
84        dup_index_right = index.searchsorted(dup, side="right") - 1
85        num_dups = dup_index_right - dup_index_left + 1
86
87        # Calculate delta that needs to be added to each duplicate
88        # index
89        try:
90            delta = (index[dup_index_right + 1] - dup) / num_dups
91        except IndexError:
92            # dup_index_right + 1 is outside of the series (i.e. the
93            # dup is at the end of the series).
94            delta = max_delta
95
96        # Clamp the maximum delta added to max_delta
97        if delta > max_delta:
98            delta = max_delta
99
100        # Add a delta to the others
101        dup_index_left += 1
102        while dup_index_left <= dup_index_right:
103            new_index[dup_index_left] += delta
104            delta += delta
105            dup_index_left += 1
106
107    return data.reindex(new_index)
108
109# Iterate fast over all rows in a data frame and apply fn
110def apply_callback(df, fn, *kwargs):
111    iters = df.itertuples()
112    event_tuple = iters.next()
113
114    # Column names beginning with underscore will not be preserved in tuples
115    # due to constraints on namedtuple field names, so store mappings from
116    # column name to column number for each trace event.
117    col_idxs = { name: idx for idx, name in enumerate(['Time'] + df.columns.tolist()) }
118
119    while True:
120        if not event_tuple:
121            break
122        event_dict = { col: event_tuple[idx] for col, idx in col_idxs.iteritems() }
123
124        if kwargs:
125            fn(event_dict, kwargs)
126        else:
127            fn(event_dict)
128
129        event_tuple = next(iters, None)
130
131
132def merge_dfs(pr_df, sec_df, pivot):
133    # Keep track of last secondary event
134    pivot_map = {}
135
136    # An array accumating dicts with merged data
137    merged_data = []
138    def df_fn(data):
139        # Store the latest secondary info
140        if data['Time'][0] == 'secondary':
141            pivot_map[data[pivot]] = data
142            # Get rid of primary/secondary labels
143            data['Time'] = data['Time'][1]
144            return
145
146        # Propogate latest secondary info
147        for key, value in data.iteritems():
148            if key == pivot:
149                continue
150            # Fast check for if value is nan (faster than np.isnan + try/except)
151            if value != value and pivot_map.has_key(data[pivot]):
152                data[key] = pivot_map[data[pivot]][key]
153
154        # Get rid of primary/secondary labels
155        data['Time'] = data['Time'][1]
156        merged_data.append(data)
157
158    df = pd.concat([pr_df, sec_df], keys=['primary', 'secondary']).sort_values(by='__line')
159    apply_callback(df, df_fn)
160    merged_df = pd.DataFrame.from_dict(merged_data)
161    merged_df.set_index('Time', inplace=True)
162
163    return merged_df
164