1//
2// Copyright (C) 2015 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8//      http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16
17#include "shill/active_link_monitor.h"
18
19#include <string>
20#include <vector>
21
22#include <base/bind.h>
23#include <base/strings/stringprintf.h>
24#include <base/strings/string_util.h>
25
26#include "shill/arp_client.h"
27#include "shill/arp_packet.h"
28#include "shill/connection.h"
29#include "shill/device_info.h"
30#include "shill/event_dispatcher.h"
31#include "shill/logging.h"
32#include "shill/metrics.h"
33#include "shill/net/ip_address.h"
34#include "shill/net/shill_time.h"
35
36using base::Bind;
37using base::Unretained;
38using std::string;
39
40namespace shill {
41
42namespace Logging {
43static auto kModuleLogScope = ScopeLogger::kLink;
44static string ObjectID(Connection* c) { return c->interface_name(); }
45}
46
47const int ActiveLinkMonitor::kDefaultTestPeriodMilliseconds = 5000;
48const int ActiveLinkMonitor::kFailureThreshold = 5;
49const int ActiveLinkMonitor::kFastTestPeriodMilliseconds = 200;
50const int ActiveLinkMonitor::kMaxResponseSampleFilterDepth = 5;
51const int ActiveLinkMonitor::kUnicastReplyReliabilityThreshold = 10;
52
53ActiveLinkMonitor::ActiveLinkMonitor(const ConnectionRefPtr& connection,
54                                     EventDispatcher* dispatcher,
55                                     Metrics* metrics,
56                                     DeviceInfo* device_info,
57                                     const FailureCallback& failure_callback,
58                                     const SuccessCallback& success_callback)
59    : connection_(connection),
60      dispatcher_(dispatcher),
61      metrics_(metrics),
62      device_info_(device_info),
63      failure_callback_(failure_callback),
64      success_callback_(success_callback),
65      // Connection is not provided when this is used as a mock for testing
66      // purpose.
67      arp_client_(
68          new ArpClient(connection ? connection->interface_index() : 0)),
69      test_period_milliseconds_(kDefaultTestPeriodMilliseconds),
70      broadcast_failure_count_(0),
71      unicast_failure_count_(0),
72      broadcast_success_count_(0),
73      unicast_success_count_(0),
74      is_unicast_(false),
75      gateway_supports_unicast_arp_(false),
76      response_sample_count_(0),
77      response_sample_bucket_(0),
78      time_(Time::GetInstance()) {
79}
80
81ActiveLinkMonitor::~ActiveLinkMonitor() {
82  Stop();
83}
84
85bool ActiveLinkMonitor::Start(int test_period) {
86  SLOG(connection_.get(), 2) << "In " << __func__ << ".";
87  StopMonitorCycle();
88  return StartInternal(test_period);
89}
90
91void ActiveLinkMonitor::Stop() {
92  SLOG(connection_.get(), 2) << "In " << __func__ << ".";
93  // Stop current cycle.
94  StopMonitorCycle();
95
96  // Clear stats accumulated from previous monitor cycles.
97  local_mac_address_.Clear();
98  gateway_mac_address_.Clear();
99  broadcast_success_count_ = 0;
100  unicast_success_count_ = 0;
101  broadcast_failure_count_ = 0;
102  unicast_failure_count_ = 0;
103  is_unicast_ = false;
104  gateway_supports_unicast_arp_ = false;
105  response_sample_bucket_ = 0;
106  response_sample_count_ = 0;
107}
108
109int ActiveLinkMonitor::GetResponseTimeMilliseconds() const {
110  return response_sample_count_ ?
111      response_sample_bucket_ / response_sample_count_ : 0;
112}
113
114bool ActiveLinkMonitor::IsGatewayFound() const {
115  return !gateway_mac_address_.IsZero();
116}
117
118bool ActiveLinkMonitor::StartInternal(int probe_period_milliseconds) {
119  test_period_milliseconds_ = probe_period_milliseconds;
120  if (test_period_milliseconds_ > kDefaultTestPeriodMilliseconds) {
121    LOG(WARNING) << "Long test period; UMA stats will be truncated.";
122  }
123
124  if (!device_info_->GetMACAddress(
125          connection_->interface_index(), &local_mac_address_)) {
126    LOG(ERROR) << "Could not get local MAC address.";
127    metrics_->NotifyLinkMonitorFailure(
128        connection_->technology(),
129        Metrics::kLinkMonitorMacAddressNotFound,
130        0, 0, 0);
131    Stop();
132    return false;
133  }
134
135  if (!StartArpClient()) {
136    LOG(ERROR) << "Failed to start ARP client.";
137    metrics_->NotifyLinkMonitorFailure(
138        connection_->technology(),
139        Metrics::kLinkMonitorClientStartFailure,
140        0, 0, 0);
141    Stop();
142    return false;
143  }
144
145  if (gateway_mac_address_.IsEmpty()) {
146    gateway_mac_address_ = ByteString(local_mac_address_.GetLength());
147  }
148  send_request_callback_.Reset(
149      Bind(&ActiveLinkMonitor::SendRequest, Unretained(this)));
150  // Post a task to send ARP request instead of calling it synchronously, to
151  // maintain consistent expectation in the case of send failures, which will
152  // always invoke failure callback.
153  dispatcher_->PostTask(send_request_callback_.callback());
154  return true;
155}
156
157void ActiveLinkMonitor::StopMonitorCycle() {
158  StopArpClient();
159  send_request_callback_.Cancel();
160  timerclear(&sent_request_at_);
161}
162
163void ActiveLinkMonitor::AddResponseTimeSample(int response_time_milliseconds) {
164  SLOG(connection_.get(), 2) << "In " << __func__ << " with sample "
165                             << response_time_milliseconds << ".";
166  metrics_->NotifyLinkMonitorResponseTimeSampleAdded(
167      connection_->technology(), response_time_milliseconds);
168  response_sample_bucket_ += response_time_milliseconds;
169  if (response_sample_count_ < kMaxResponseSampleFilterDepth) {
170    ++response_sample_count_;
171  } else {
172    response_sample_bucket_ =
173        response_sample_bucket_ * kMaxResponseSampleFilterDepth /
174            (kMaxResponseSampleFilterDepth + 1);
175  }
176}
177
178// static
179string ActiveLinkMonitor::HardwareAddressToString(const ByteString& address) {
180  std::vector<string> address_parts;
181  for (size_t i = 0; i < address.GetLength(); ++i) {
182    address_parts.push_back(
183        base::StringPrintf("%02x", address.GetConstData()[i]));
184  }
185  return base::JoinString(address_parts, ":");
186}
187
188bool ActiveLinkMonitor::StartArpClient() {
189  if (!arp_client_->StartReplyListener()) {
190    return false;
191  }
192  SLOG(connection_.get(), 4) << "Created ARP client; listening on socket "
193                             << arp_client_->socket() << ".";
194  receive_response_handler_.reset(
195    dispatcher_->CreateReadyHandler(
196        arp_client_->socket(),
197        IOHandler::kModeInput,
198        Bind(&ActiveLinkMonitor::ReceiveResponse, Unretained(this))));
199  return true;
200}
201
202void ActiveLinkMonitor::StopArpClient() {
203  arp_client_->Stop();
204  receive_response_handler_.reset();
205}
206
207bool ActiveLinkMonitor::AddMissedResponse() {
208  SLOG(connection_.get(), 2) << "In " << __func__ << ".";
209  AddResponseTimeSample(test_period_milliseconds_);
210
211  if (is_unicast_) {
212    if (gateway_supports_unicast_arp_) {
213      ++unicast_failure_count_;
214    }
215    unicast_success_count_ = 0;
216  } else {
217    ++broadcast_failure_count_;
218    broadcast_success_count_ = 0;
219  }
220
221  if (unicast_failure_count_ + broadcast_failure_count_ >= kFailureThreshold) {
222    LOG(ERROR) << "Link monitor has reached the failure threshold with "
223               << broadcast_failure_count_
224               << " broadcast failures and "
225               << unicast_failure_count_
226               << " unicast failures.";
227    failure_callback_.Run(Metrics::kLinkMonitorFailureThresholdReached,
228                          broadcast_failure_count_,
229                          unicast_failure_count_);
230    Stop();
231    return true;
232  }
233  is_unicast_ = !is_unicast_;
234  return false;
235}
236
237void ActiveLinkMonitor::ReceiveResponse(int fd) {
238  SLOG(connection_.get(), 2) << "In " << __func__ << ".";
239  ArpPacket packet;
240  ByteString sender;
241  if (!arp_client_->ReceivePacket(&packet, &sender)) {
242    return;
243  }
244
245  if (!packet.IsReply()) {
246    SLOG(connection_.get(), 4) << "This is not a reply packet.  Ignoring.";
247    return;
248  }
249
250  if (!connection_->local().address().Equals(
251           packet.remote_ip_address().address())) {
252    SLOG(connection_.get(), 4) << "Response is not for our IP address.";
253    return;
254  }
255
256  if (!local_mac_address_.Equals(packet.remote_mac_address())) {
257    SLOG(connection_.get(), 4) << "Response is not for our MAC address.";
258    return;
259  }
260
261  if (!connection_->gateway().address().Equals(
262           packet.local_ip_address().address())) {
263    SLOG(connection_.get(), 4)
264        << "Response is not from the gateway IP address.";
265    return;
266  }
267
268  struct timeval now, elapsed_time;
269  time_->GetTimeMonotonic(&now);
270  timersub(&now, &sent_request_at_, &elapsed_time);
271
272  AddResponseTimeSample(elapsed_time.tv_sec * 1000 +
273                        elapsed_time.tv_usec / 1000);
274
275  if (is_unicast_) {
276    ++unicast_success_count_;
277    unicast_failure_count_ = 0;
278    if (unicast_success_count_ >= kUnicastReplyReliabilityThreshold) {
279      SLOG_IF(Link, 2, !gateway_supports_unicast_arp_)
280          << "Gateway is now considered a reliable unicast responder.  "
281             "Unicast failures will now count.";
282      gateway_supports_unicast_arp_ = true;
283    }
284  } else {
285    ++broadcast_success_count_;
286    broadcast_failure_count_ = 0;
287  }
288
289  if (!gateway_mac_address_.Equals(packet.local_mac_address())) {
290    const ByteString& new_mac_address = packet.local_mac_address();
291    if (!IsGatewayFound()) {
292      SLOG(connection_.get(), 2) << "Found gateway at "
293                                 << HardwareAddressToString(new_mac_address);
294    } else {
295      SLOG(connection_.get(), 2) << "Gateway MAC address changed.";
296    }
297    gateway_mac_address_ = new_mac_address;
298  }
299
300  is_unicast_ = !is_unicast_;
301
302  // Stop the current cycle, and invoke the success callback. All the
303  // accumulated stats regarding the gateway are not cleared.
304  StopMonitorCycle();
305  success_callback_.Run();
306}
307
308void ActiveLinkMonitor::SendRequest() {
309  SLOG(connection_.get(), 2) << "In " << __func__ << ".";
310
311  // Timeout waiting for ARP reply and exceed the failure threshold.
312  if (timerisset(&sent_request_at_) && AddMissedResponse()) {
313    return;
314  }
315
316  ByteString destination_mac_address(gateway_mac_address_.GetLength());
317  if (!IsGatewayFound()) {
318    // The remote MAC addess is set by convention to be all-zeroes in the
319    // ARP header if not known.  The ArpClient will translate an all-zeroes
320    // remote address into a send to the broadcast (all-ones) address in
321    // the Ethernet frame header.
322    SLOG_IF(Link, 2, is_unicast_) << "Sending broadcast since "
323                                  << "gateway MAC is unknown";
324    is_unicast_ = false;
325  } else if (is_unicast_) {
326    destination_mac_address = gateway_mac_address_;
327  }
328
329  ArpPacket request(connection_->local(), connection_->gateway(),
330                    local_mac_address_, destination_mac_address);
331  if (!arp_client_->TransmitRequest(request)) {
332    LOG(ERROR) << "Failed to send ARP request.  Stopping.";
333    failure_callback_.Run(Metrics::kLinkMonitorTransmitFailure,
334                          broadcast_failure_count_,
335                          unicast_failure_count_);
336    Stop();
337    return;
338  }
339
340  time_->GetTimeMonotonic(&sent_request_at_);
341
342  dispatcher_->PostDelayedTask(send_request_callback_.callback(),
343                               test_period_milliseconds_);
344}
345
346}  // namespace shill
347