1c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// found in the LICENSE file.
4c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
5c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "net/quic/congestion_control/cube_root.h"
6c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
7c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#include "base/logging.h"
8c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
9c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace {
10c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
11c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Find last bit in a 64-bit word.
12c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)int FindMostSignificantBit(uint64 x) {
13c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (!x) {
14c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return 0;
15c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
16c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  int r = 0;
17c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0xffffffff00000000ull) {
18c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 32;
19c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r += 32;
20c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
21c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0xffff0000u) {
22c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 16;
23c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r += 16;
24c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
25c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0xff00u) {
26c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 8;
27c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r += 8;
28c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
29c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0xf0u) {
30c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 4;
31c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r += 4;
32c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
33c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0xcu) {
34c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 2;
35c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r += 2;
36c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
37c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0x02u) {
38c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    x >>= 1;
39c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r++;
40c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
41c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (x & 0x01u) {
42c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    r++;
43c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return r;
45c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
47c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// 6 bits table [0..63]
48c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)const uint32 cube_root_table[] = {
49c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    0,  54,  54,  54, 118, 118, 118, 118, 123, 129, 134, 138, 143, 147, 151,
50c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  156, 157, 161, 164, 168, 170, 173, 176, 179, 181, 185, 187, 190, 192, 194,
51c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  197, 199, 200, 202, 204, 206, 209, 211, 213, 215, 217, 219, 221, 222, 224,
52c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  225, 227, 229, 231, 232, 234, 236, 237, 239, 240, 242, 244, 245, 246, 248,
53c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  250, 251, 252, 254
54c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)};
55c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}  // namespace
56c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
57c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)namespace net {
58c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
59c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// Calculate the cube root using a table lookup followed by one Newton-Raphson
60c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)// iteration.
61c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)uint32 CubeRoot::Root(uint64 a) {
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  uint32 msb = FindMostSignificantBit(a);
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DCHECK_LE(msb, 64u);
64c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
65c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  if (msb < 7) {
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    // MSB in our table.
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    return ((cube_root_table[static_cast<uint32>(a)]) + 31) >> 6;
68c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }
69c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // MSB          7,  8,  9, 10, 11, 12, 13, 14, 15, 16, ...
70c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // cubic_shift  1,  1,  1,  2,  2,  2,  3,  3,  3,  4, ...
71c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  uint32 cubic_shift = (msb - 4);
72c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  cubic_shift = ((cubic_shift * 342) >> 10);  // Div by 3, biased high.
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // 4 to 6 bits accuracy depending on MSB.
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  uint32 down_shifted_to_6bit = (a >> (cubic_shift * 3));
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  uint64 root = ((cube_root_table[down_shifted_to_6bit] + 10) << cubic_shift)
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)      >> 6;
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Make one Newton-Raphson iteration.
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Since x has an error (inaccuracy due to the use of fix point) we get a
81c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // more accurate result by doing x * (x - 1) instead of x * x.
82c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  root = 2 * root + (a / (root * (root - 1)));
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  root = ((root * 341) >> 10);  // Div by 3, biased low.
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  return static_cast<uint32>(root);
85c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}
86c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
87c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)}  // namespace net
88