1/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "src/traced/probes/probes_producer.h"
18
19#include <stdio.h>
20#include <sys/stat.h>
21
22#include <algorithm>
23#include <queue>
24#include <string>
25
26#include "perfetto/base/logging.h"
27#include "perfetto/base/weak_ptr.h"
28#include "perfetto/traced/traced.h"
29#include "perfetto/tracing/core/data_source_config.h"
30#include "perfetto/tracing/core/data_source_descriptor.h"
31#include "perfetto/tracing/core/ftrace_config.h"
32#include "perfetto/tracing/core/trace_config.h"
33#include "perfetto/tracing/core/trace_packet.h"
34#include "src/traced/probes/filesystem/inode_file_data_source.h"
35
36#include "perfetto/trace/filesystem/inode_file_map.pbzero.h"
37#include "perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
38#include "perfetto/trace/ftrace/ftrace_stats.pbzero.h"
39#include "perfetto/trace/trace_packet.pbzero.h"
40
41namespace perfetto {
42namespace {
43
44constexpr uint32_t kInitialConnectionBackoffMs = 100;
45constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000;
46constexpr char kFtraceSourceName[] = "linux.ftrace";
47constexpr char kProcessStatsSourceName[] = "linux.process_stats";
48constexpr char kInodeMapSourceName[] = "linux.inode_file_map";
49
50}  // namespace.
51
52// State transition diagram:
53//                    +----------------------------+
54//                    v                            +
55// NotStarted -> NotConnected -> Connecting -> Connected
56//                    ^              +
57//                    +--------------+
58//
59
60ProbesProducer::ProbesProducer() {}
61ProbesProducer::~ProbesProducer() = default;
62
63void ProbesProducer::OnConnect() {
64  PERFETTO_DCHECK(state_ == kConnecting);
65  state_ = kConnected;
66  ResetConnectionBackoff();
67  PERFETTO_LOG("Connected to the service");
68
69  DataSourceDescriptor ftrace_descriptor;
70  ftrace_descriptor.set_name(kFtraceSourceName);
71  endpoint_->RegisterDataSource(ftrace_descriptor);
72
73  DataSourceDescriptor process_stats_descriptor;
74  process_stats_descriptor.set_name(kProcessStatsSourceName);
75  endpoint_->RegisterDataSource(process_stats_descriptor);
76
77  DataSourceDescriptor inode_map_descriptor;
78  inode_map_descriptor.set_name(kInodeMapSourceName);
79  endpoint_->RegisterDataSource(inode_map_descriptor);
80}
81
82void ProbesProducer::OnDisconnect() {
83  PERFETTO_DCHECK(state_ == kConnected || state_ == kConnecting);
84  PERFETTO_LOG("Disconnected from tracing service");
85  if (state_ == kConnected)
86    return task_runner_->PostTask([this] { this->Restart(); });
87
88  state_ = kNotConnected;
89  IncreaseConnectionBackoff();
90  task_runner_->PostDelayedTask([this] { this->Connect(); },
91                                connection_backoff_ms_);
92}
93
94void ProbesProducer::Restart() {
95  // We lost the connection with the tracing service. At this point we need
96  // to reset all the data sources. Trying to handle that manually is going to
97  // be error prone. What we do here is simply desroying the instance and
98  // recreating it again.
99  // TODO(hjd): Add e2e test for this.
100
101  base::TaskRunner* task_runner = task_runner_;
102  const char* socket_name = socket_name_;
103
104  // Invoke destructor and then the constructor again.
105  this->~ProbesProducer();
106  new (this) ProbesProducer();
107
108  ConnectWithRetries(socket_name, task_runner);
109}
110
111void ProbesProducer::CreateDataSourceInstance(DataSourceInstanceID instance_id,
112                                              const DataSourceConfig& config) {
113  // TODO(hjd): This a hack since we don't actually know the session id. For
114  // now we'll assume anything wit hthe same target buffer is in the same
115  // session.
116  TracingSessionID session_id = config.target_buffer();
117
118  if (config.name() == kFtraceSourceName) {
119    if (!CreateFtraceDataSourceInstance(session_id, instance_id, config))
120      failed_sources_.insert(instance_id);
121  } else if (config.name() == kInodeMapSourceName) {
122    CreateInodeFileDataSourceInstance(session_id, instance_id, config);
123  } else if (config.name() == kProcessStatsSourceName) {
124    CreateProcessStatsDataSourceInstance(session_id, instance_id, config);
125  } else {
126    PERFETTO_ELOG("Data source name: %s not recognised.",
127                  config.name().c_str());
128    return;
129  }
130
131  std::map<TracingSessionID, InodeFileDataSource*> file_sources;
132  std::map<TracingSessionID, ProcessStatsDataSource*> ps_sources;
133  for (const auto& pair : file_map_sources_)
134    file_sources[pair.second->session_id()] = pair.second.get();
135  for (const auto& pair : process_stats_sources_)
136    ps_sources[pair.second->session_id()] = pair.second.get();
137
138  for (const auto& id_to_source : delegates_) {
139    const std::unique_ptr<SinkDelegate>& source = id_to_source.second;
140    if (session_id != source->session_id())
141      continue;
142    if (!source->ps_source() && ps_sources.count(session_id))
143      source->set_ps_source(ps_sources[session_id]->GetWeakPtr());
144    if (!source->file_source() && file_sources.count(session_id))
145      source->set_file_source(file_sources[session_id]->GetWeakPtr());
146  }
147}
148
149void ProbesProducer::AddWatchdogsTimer(DataSourceInstanceID id,
150                                       const DataSourceConfig& config) {
151  if (config.trace_duration_ms() != 0)
152    watchdogs_.emplace(id, base::Watchdog::GetInstance()->CreateFatalTimer(
153                               5000 + 2 * config.trace_duration_ms()));
154}
155
156bool ProbesProducer::CreateFtraceDataSourceInstance(
157    TracingSessionID session_id,
158    DataSourceInstanceID id,
159    const DataSourceConfig& config) {
160  // Don't retry if FtraceController::Create() failed once.
161  // This can legitimately happen on user builds where we cannot access the
162  // debug paths, e.g., because of SELinux rules.
163  if (ftrace_creation_failed_)
164    return false;
165
166  // Lazily create on the first instance.
167  if (!ftrace_) {
168    ftrace_ = FtraceController::Create(task_runner_);
169
170    if (!ftrace_) {
171      PERFETTO_ELOG("Failed to create FtraceController");
172      ftrace_creation_failed_ = true;
173      return false;
174    }
175
176    ftrace_->DisableAllEvents();
177    ftrace_->ClearTrace();
178  }
179
180  PERFETTO_LOG("Ftrace start (id=%" PRIu64 ", target_buf=%" PRIu32 ")", id,
181               config.target_buffer());
182
183  FtraceConfig proto_config = config.ftrace_config();
184
185  // TODO(hjd): Static cast is bad, target_buffer() should return a BufferID.
186  auto trace_writer = endpoint_->CreateTraceWriter(
187      static_cast<BufferID>(config.target_buffer()));
188  auto delegate = std::unique_ptr<SinkDelegate>(
189      new SinkDelegate(session_id, task_runner_, std::move(trace_writer)));
190  auto sink = ftrace_->CreateSink(std::move(proto_config), delegate.get());
191  if (!sink) {
192    PERFETTO_ELOG("Failed to start tracing (maybe someone else is using it?)");
193    return false;
194  }
195  delegate->set_sink(std::move(sink));
196  delegates_.emplace(id, std::move(delegate));
197  AddWatchdogsTimer(id, config);
198  return true;
199}
200
201void ProbesProducer::CreateInodeFileDataSourceInstance(
202    TracingSessionID session_id,
203    DataSourceInstanceID id,
204    DataSourceConfig source_config) {
205  PERFETTO_LOG("Inode file map start (id=%" PRIu64 ", target_buf=%" PRIu32 ")",
206               id, source_config.target_buffer());
207  auto trace_writer = endpoint_->CreateTraceWriter(
208      static_cast<BufferID>(source_config.target_buffer()));
209  if (system_inodes_.empty())
210    CreateStaticDeviceToInodeMap("/system", &system_inodes_);
211  auto file_map_source =
212      std::unique_ptr<InodeFileDataSource>(new InodeFileDataSource(
213          std::move(source_config), task_runner_, session_id, &system_inodes_,
214          &cache_, std::move(trace_writer)));
215  file_map_sources_.emplace(id, std::move(file_map_source));
216  AddWatchdogsTimer(id, source_config);
217}
218
219void ProbesProducer::CreateProcessStatsDataSourceInstance(
220    TracingSessionID session_id,
221    DataSourceInstanceID id,
222    const DataSourceConfig& config) {
223  PERFETTO_DCHECK(process_stats_sources_.count(id) == 0);
224  auto trace_writer = endpoint_->CreateTraceWriter(
225      static_cast<BufferID>(config.target_buffer()));
226  auto source = std::unique_ptr<ProcessStatsDataSource>(
227      new ProcessStatsDataSource(session_id, std::move(trace_writer), config));
228  auto it_and_inserted = process_stats_sources_.emplace(id, std::move(source));
229  if (!it_and_inserted.second) {
230    PERFETTO_DCHECK(false);
231    return;
232  }
233  ProcessStatsDataSource* ps_data_source = it_and_inserted.first->second.get();
234  if (config.process_stats_config().scan_all_processes_on_start()) {
235    ps_data_source->WriteAllProcesses();
236  }
237}
238
239void ProbesProducer::TearDownDataSourceInstance(DataSourceInstanceID id) {
240  PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id);
241  // |id| could be the id of any of the datasources we handle:
242  PERFETTO_DCHECK((failed_sources_.count(id) + delegates_.count(id) +
243                   process_stats_sources_.count(id) +
244                   file_map_sources_.count(id)) == 1);
245  failed_sources_.erase(id);
246  delegates_.erase(id);
247  process_stats_sources_.erase(id);
248  file_map_sources_.erase(id);
249  watchdogs_.erase(id);
250}
251
252void ProbesProducer::OnTracingSetup() {}
253
254void ProbesProducer::Flush(FlushRequestID flush_request_id,
255                           const DataSourceInstanceID* data_source_ids,
256                           size_t num_data_sources) {
257  for (size_t i = 0; i < num_data_sources; i++) {
258    DataSourceInstanceID ds_id = data_source_ids[i];
259    {
260      auto it = process_stats_sources_.find(ds_id);
261      if (it != process_stats_sources_.end())
262        it->second->Flush();
263    }
264    {
265      auto it = file_map_sources_.find(ds_id);
266      if (it != file_map_sources_.end())
267        it->second->Flush();
268    }
269    {
270      auto it = delegates_.find(ds_id);
271      if (it != delegates_.end())
272        it->second->Flush();
273    }
274  }
275  endpoint_->NotifyFlushComplete(flush_request_id);
276}
277
278void ProbesProducer::ConnectWithRetries(const char* socket_name,
279                                        base::TaskRunner* task_runner) {
280  PERFETTO_DCHECK(state_ == kNotStarted);
281  state_ = kNotConnected;
282
283  ResetConnectionBackoff();
284  socket_name_ = socket_name;
285  task_runner_ = task_runner;
286  Connect();
287}
288
289void ProbesProducer::Connect() {
290  PERFETTO_DCHECK(state_ == kNotConnected);
291  state_ = kConnecting;
292  endpoint_ = ProducerIPCClient::Connect(
293      socket_name_, this, "perfetto.traced_probes", task_runner_);
294}
295
296void ProbesProducer::IncreaseConnectionBackoff() {
297  connection_backoff_ms_ *= 2;
298  if (connection_backoff_ms_ > kMaxConnectionBackoffMs)
299    connection_backoff_ms_ = kMaxConnectionBackoffMs;
300}
301
302void ProbesProducer::ResetConnectionBackoff() {
303  connection_backoff_ms_ = kInitialConnectionBackoffMs;
304}
305
306ProbesProducer::SinkDelegate::SinkDelegate(TracingSessionID id,
307                                           base::TaskRunner* task_runner,
308                                           std::unique_ptr<TraceWriter> writer)
309    : session_id_(id),
310      task_runner_(task_runner),
311      writer_(std::move(writer)),
312      weak_factory_(this) {}
313
314ProbesProducer::SinkDelegate::~SinkDelegate() = default;
315
316void ProbesProducer::SinkDelegate::OnCreate(FtraceSink* sink) {
317  sink->DumpFtraceStats(&stats_before_);
318}
319
320void ProbesProducer::SinkDelegate::Flush() {
321  // TODO(primiano): this still doesn't flush data from the kernel ftrace
322  // buffers (see b/73886018). We should do that and delay the
323  // NotifyFlushComplete() until the ftrace data has been drained from the
324  // kernel ftrace buffer and written in the SMB.
325  if (writer_ && (!trace_packet_ || trace_packet_->is_finalized())) {
326    WriteStats();
327    writer_->Flush();
328  }
329}
330
331void ProbesProducer::SinkDelegate::WriteStats() {
332  {
333    auto before_packet = writer_->NewTracePacket();
334    auto out = before_packet->set_ftrace_stats();
335    out->set_phase(protos::pbzero::FtraceStats_Phase_START_OF_TRACE);
336    stats_before_.Write(out);
337  }
338  {
339    FtraceStats stats_after{};
340    sink_->DumpFtraceStats(&stats_after);
341    auto after_packet = writer_->NewTracePacket();
342    auto out = after_packet->set_ftrace_stats();
343    out->set_phase(protos::pbzero::FtraceStats_Phase_END_OF_TRACE);
344    stats_after.Write(out);
345  }
346}
347
348ProbesProducer::FtraceBundleHandle
349ProbesProducer::SinkDelegate::GetBundleForCpu(size_t) {
350  trace_packet_ = writer_->NewTracePacket();
351  return FtraceBundleHandle(trace_packet_->set_ftrace_events());
352}
353
354void ProbesProducer::SinkDelegate::OnBundleComplete(
355    size_t,
356    FtraceBundleHandle,
357    const FtraceMetadata& metadata) {
358  trace_packet_->Finalize();
359
360  if (file_source_ && !metadata.inode_and_device.empty()) {
361    auto inodes = metadata.inode_and_device;
362    auto weak_file_source = file_source_;
363    task_runner_->PostTask([weak_file_source, inodes] {
364      if (weak_file_source)
365        weak_file_source->OnInodes(inodes);
366    });
367  }
368  if (ps_source_ && !metadata.pids.empty()) {
369    const auto& quirks = ps_source_->config().process_stats_config().quirks();
370    if (std::find(quirks.begin(), quirks.end(),
371                  ProcessStatsConfig::DISABLE_ON_DEMAND) != quirks.end()) {
372      return;
373    }
374    const auto& pids = metadata.pids;
375    auto weak_ps_source = ps_source_;
376    task_runner_->PostTask([weak_ps_source, pids] {
377      if (weak_ps_source)
378        weak_ps_source->OnPids(pids);
379    });
380  }
381}
382
383}  // namespace perfetto
384