RBD 卷性能统计

性能查询请求类似如下 SQL 查询语句:

select ops, bytes, latency ... from osds where pool_id=<> osd_id=<> object_name=<> ... order by <> limit <max>

内部实现

OSD 负责遍历承载在自己之上的 PG,收集 PG 记录的统计信息,然后定时通过 MgrClient 上报至 Mgr 进行汇总。

MgrClient:

class MgrClient {
  void set_perf_metric_query_cb(
    std::function<void(const std::map<OSDPerfMetricQuery,
                                      OSDPerfMetricLimits> &)> cb_set,
    std::function<void(std::map<OSDPerfMetricQuery,
                                      OSDPerfMetricReport> *)> cb_get)
  {
      std::lock_guard l(lock);
      set_perf_queries_cb = cb_set;
      get_perf_report_cb = cb_get;
  }
}

MgrClient::handle_mgr_configure {
  set_perf_queries_cb(m->osd_perf_metric_queries);
}

MgrClient::_send_report {
  get_perf_report_cb(&report->osd_perf_metric_reports)
}

OSD:

class OSD {
private:
  void set_perf_queries(
      const std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> &queries);
  void get_perf_reports(
      std::map<OSDPerfMetricQuery, OSDPerfMetricReport> *reports);

  Mutex m_perf_queries_lock = {"OSD::m_perf_queries_lock"};
  std::list<OSDPerfMetricQuery> m_perf_queries;
  std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> m_perf_limits;
};

class PrimaryLogPG {
public:
  void set_dynamic_perf_stats_queries(
      const std::list<OSDPerfMetricQuery> &queries)  override;
  void get_dynamic_perf_stats(DynamicPerfStats *stats)  override;

private:
  DynamicPerfStats m_dynamic_perf_stats;
};


OSD::init {
  mgrc.set_perf_metric_query_cb(
    [this](const std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> &queries) {
      set_perf_queries(queries);
    },
    [this](std::map<OSDPerfMetricQuery, OSDPerfMetricReport> *reports) {
      get_perf_reports(reports);
    }
  );
}

void OSD::set_perf_queries(
    const std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> &queries) {
  dout(10) << "setting " << queries.size() << " queries" << dendl;

  std::list<OSDPerfMetricQuery> supported_queries;
  for (auto &it : queries) {
    auto &query = it.first;
    if (!query.key_descriptor.empty()) {
      supported_queries.push_back(query);
    }
  }
  if (supported_queries.size() < queries.size()) {
    dout(1) << queries.size() - supported_queries.size()
            << " unsupported queries" << dendl;
  }

  {
    Mutex::Locker locker(m_perf_queries_lock);
    m_perf_queries = supported_queries;
    m_perf_limits = queries;
  }

  std::vector<PGRef> pgs;
  _get_pgs(&pgs);
  for (auto& pg : pgs) {
    if (pg->is_primary()) {
      pg->lock();
      pg->set_dynamic_perf_stats_queries(supported_queries);
      pg->unlock();
    }
  }
}

void OSD::get_perf_reports(
    std::map<OSDPerfMetricQuery, OSDPerfMetricReport> *reports) {
  std::vector<PGRef> pgs;
  _get_pgs(&pgs);
  DynamicPerfStats dps;
  for (auto& pg : pgs) {
    if (pg->is_primary()) {
      // m_perf_queries can be modified only in set_perf_queries by mgr client
      // request, and it is protected by by mgr client's lock, which is held
      // when set_perf_queries/get_perf_reports are called, so we may not hold
      // m_perf_queries_lock here.
      DynamicPerfStats pg_dps(m_perf_queries);
      pg->lock();
      pg->get_dynamic_perf_stats(&pg_dps);
      pg->unlock();
      dps.merge(pg_dps);
    }
  }
  dps.add_to_reports(m_perf_limits, reports);
  dout(20) << "reports for " << reports->size() << " queries" << dendl;
}

参考资料

New in Nautilus: RBD Performance Monitoring

https://ceph.com/rbd/new-in-nautilus-rbd-performance-monitoring/

mgr: create shell OSD performance query class

https://github.com/ceph/ceph/pull/24117

mgr: update MMgrConfigure message to include optional OSD perf queries

https://github.com/ceph/ceph/pull/24180

[DNM] mgr: update MMgrReport message to include OSD dynamic key/value perf counters

https://github.com/ceph/ceph/pull/24215

osd: collect client perf stats when query is enabled

https://github.com/ceph/ceph/pull/24265

rbd: implement new ‘rbd perf image iostat/iotop’ commands

https://github.com/ceph/ceph/pull/26133


最后修改于 2019-07-09