Apache Mesos
metrics.hpp
Go to the documentation of this file.
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 
17 #ifndef __MASTER_METRICS_HPP__
18 #define __MASTER_METRICS_HPP__
19 
20 #include <string>
21 #include <vector>
22 
24 
29 
30 #include <stout/hashmap.hpp>
31 
32 #include "mesos/mesos.hpp"
33 #include "mesos/type_utils.hpp"
34 
35 namespace mesos {
36 namespace internal {
37 namespace master {
38 
39 class Master;
40 
41 struct Metrics
42 {
43  explicit Metrics(const Master& master);
44 
45  ~Metrics();
46 
49 
55 
60 
62 
63  // Task state metrics.
77 
80 
81  // NOTE: We only track metrics sources and reasons for terminal states.
83 
84  // Message counters.
86 
87  // Metrics specific to frameworks of a common principal.
88  // These metrics have names prefixed by "frameworks/<principal>/".
89  struct Frameworks
90  {
91  // Counters for messages from all frameworks of this principal.
92  // Note: We only count messages from active scheduler
93  // *instances* while they are *registered*. i.e., messages
94  // prior to the completion of (re)registration
95  // (AuthenticateMessage and (Re)RegisterFrameworkMessage) and
96  // messages from an inactive scheduler instance (after the
97  // framework has failed over) are not counted.
98 
99  // Framework messages received (before processing).
101 
102  // Framework messages processed.
103  // NOTE: This doesn't include dropped messages. Processing of
104  // a message may be throttled by a RateLimiter if one is
105  // configured for this principal. Also due to Master's
106  // asynchronous nature, this doesn't necessarily mean the work
107  // requested by this message has finished.
109 
110  explicit Frameworks(const std::string& principal)
111  : messages_received("frameworks/" + principal + "/messages_received"),
112  messages_processed("frameworks/" + principal + "/messages_processed")
113  {
114  process::metrics::add(messages_received);
115  process::metrics::add(messages_processed);
116  }
117 
119  {
120  process::metrics::remove(messages_received);
121  process::metrics::remove(messages_processed);
122  }
123  };
124 
125  // Per-framework-principal metrics keyed by the framework
126  // principal.
128 
129  // Messages from schedulers.
145 
146  // Messages from executors.
148 
149  // Messages from slaves.
156 
157  // Messages from both schedulers and slaves.
159 
164 
167 
170 
173 
174  // Recovery counters.
176 
177  // Process metrics.
181 
182  // Successful registry operations.
189 
190  // Slave observer metrics.
191  //
192  // TODO(neilc): The `slave_shutdowns_xxx` metrics are deprecated and
193  // will always be zero. Remove in Mesos 2.0.
197 
201 
202  // Non-revocable resources.
203  std::vector<process::metrics::PullGauge> resources_total;
204  std::vector<process::metrics::PullGauge> resources_used;
205  std::vector<process::metrics::PullGauge> resources_percent;
206 
207  // Revocable resources.
208  std::vector<process::metrics::PullGauge> resources_revocable_total;
209  std::vector<process::metrics::PullGauge> resources_revocable_used;
210  std::vector<process::metrics::PullGauge> resources_revocable_percent;
211 
213 
215  const TaskState& state,
216  const TaskStatus::Source& source,
217  const TaskStatus::Reason& reason);
218 };
219 
220 
222 {
223  explicit FrameworkMetrics(const FrameworkInfo& _frameworkInfo);
224 
225  ~FrameworkMetrics();
226 
227  void incrementCall(const mesos::scheduler::Call::Type& callType);
228 
229  void incrementEvent(const mesos::scheduler::Event& event);
230 
231  // Overloads to convert unversioned messages into events.
232  void incrementEvent(const FrameworkErrorMessage& message);
233  void incrementEvent(const ExitedExecutorMessage& message);
234  void incrementEvent(const LostSlaveMessage& message);
235  void incrementEvent(const InverseOffersMessage& message);
236  void incrementEvent(const ExecutorToFrameworkMessage& message);
237  void incrementEvent(const ResourceOffersMessage& message);
238  void incrementEvent(const RescindResourceOfferMessage& message);
239  void incrementEvent(const RescindInverseOfferMessage& message);
240  void incrementEvent(const FrameworkRegisteredMessage& message);
241  void incrementEvent(const FrameworkReregisteredMessage& message);
242  void incrementEvent(const StatusUpdateMessage& message);
243  void incrementEvent(const UpdateOperationStatusMessage& message);
244 
245  void incrementTaskState(const TaskState& state);
246  void decrementActiveTaskState(const TaskState& state);
247 
248  void incrementOperation(const Offer::Operation& operation);
249 
250  const FrameworkInfo frameworkInfo;
251 
253 
256 
259 
264 
266 
268 
271 };
272 
273 
274 std::string getFrameworkMetricPrefix(const FrameworkInfo& frameworkInfo);
275 
276 } // namespace master {
277 } // namespace internal {
278 } // namespace mesos {
279 
280 #endif // __MASTER_METRICS_HPP__
process::metrics::Counter messages_executor_to_framework
Definition: metrics.hpp:147
const FrameworkInfo frameworkInfo
Definition: metrics.hpp:250
process::metrics::Counter messages_reregister_slave
Definition: metrics.hpp:151
~Frameworks()
Definition: metrics.hpp:118
process::metrics::PullGauge outstanding_offers
Definition: metrics.hpp:61
process::metrics::PullGauge frameworks_inactive
Definition: metrics.hpp:59
process::metrics::Counter messages_reregister_framework
Definition: metrics.hpp:131
process::metrics::Counter recovery_slave_removals
Definition: metrics.hpp:175
Definition: master.hpp:27
process::metrics::PullGauge tasks_killing
Definition: metrics.hpp:68
Future< Nothing > remove(const Metric &metric)
Definition: metrics.hpp:109
process::metrics::Counter tasks_finished
Definition: metrics.hpp:69
hashmap< TaskStatus::Source, Reasons > SourcesReasons
Definition: metrics.hpp:79
process::metrics::Counter messages_reconcile_tasks
Definition: metrics.hpp:142
process::metrics::PullGauge slaves_connected
Definition: metrics.hpp:50
std::vector< process::metrics::PullGauge > resources_total
Definition: metrics.hpp:203
hashmap< TaskState, process::metrics::PushGauge > active_task_states
Definition: metrics.hpp:267
process::metrics::Counter messages_deactivate_framework
Definition: metrics.hpp:133
process::metrics::Counter slave_unreachable_completed
Definition: metrics.hpp:199
process::metrics::Counter valid_status_updates
Definition: metrics.hpp:165
process::metrics::PullGauge event_queue_dispatches
Definition: metrics.hpp:179
process::metrics::Counter messages_update_slave
Definition: metrics.hpp:155
mesos::v1::scheduler::Call Call
Definition: mesos.hpp:2616
process::metrics::Counter calls
Definition: metrics.hpp:254
process::metrics::PullGauge tasks_unreachable
Definition: metrics.hpp:67
process::metrics::PullGauge tasks_staging
Definition: metrics.hpp:64
process::metrics::Counter slave_shutdowns_scheduled
Definition: metrics.hpp:194
void incrementTasksStates(const TaskState &state, const TaskStatus::Source &source, const TaskStatus::Reason &reason)
process::metrics::Counter messages_kill_task
Definition: metrics.hpp:134
process::metrics::Counter tasks_failed
Definition: metrics.hpp:70
void incrementInvalidSchedulerCalls(const mesos::scheduler::Call &call)
process::metrics::Counter valid_status_update_acknowledgements
Definition: metrics.hpp:168
process::metrics::Counter messages_processed
Definition: metrics.hpp:108
Operation
Definition: cgroups.hpp:458
process::metrics::Counter tasks_lost
Definition: metrics.hpp:72
process::metrics::Counter slave_reregistrations
Definition: metrics.hpp:184
Future< Nothing > add(const T &metric)
Definition: metrics.hpp:95
hashmap< std::string, process::Owned< Frameworks > > frameworks
Definition: metrics.hpp:127
process::metrics::PushGauge subscribed
Definition: metrics.hpp:252
process::metrics::Counter events
Definition: metrics.hpp:257
Definition: counter.hpp:26
std::vector< process::metrics::PullGauge > resources_revocable_total
Definition: metrics.hpp:208
mesos::v1::scheduler::Event Event
Definition: mesos.hpp:2617
Definition: hashmap.hpp:38
process::metrics::Counter messages_status_update_acknowledgement
Definition: metrics.hpp:135
process::metrics::Counter valid_framework_to_executor_messages
Definition: metrics.hpp:160
process::metrics::PullGauge slaves_disconnected
Definition: metrics.hpp:51
process::metrics::Counter messages_authenticate
Definition: metrics.hpp:158
process::metrics::Counter invalid_executor_to_framework_messages
Definition: metrics.hpp:163
process::metrics::PullGauge frameworks_active
Definition: metrics.hpp:58
std::vector< process::metrics::PullGauge > resources_revocable_used
Definition: metrics.hpp:209
process::metrics::Counter slave_unreachable_canceled
Definition: metrics.hpp:200
process::metrics::PullGauge event_queue_http_requests
Definition: metrics.hpp:180
process::metrics::Counter tasks_error
Definition: metrics.hpp:73
process::metrics::Counter valid_executor_to_framework_messages
Definition: metrics.hpp:162
Definition: pull_gauge.hpp:46
process::metrics::PullGauge tasks_running
Definition: metrics.hpp:66
std::vector< process::metrics::PullGauge > resources_percent
Definition: metrics.hpp:205
process::metrics::Counter tasks_gone
Definition: metrics.hpp:75
process::metrics::Counter messages_launch_tasks
Definition: metrics.hpp:137
process::metrics::Counter slave_unreachable_scheduled
Definition: metrics.hpp:198
process::metrics::Counter dropped_messages
Definition: metrics.hpp:85
process::metrics::Counter invalid_framework_to_executor_messages
Definition: metrics.hpp:161
Definition: spec.hpp:26
process::metrics::Counter messages_suppress_offers
Definition: metrics.hpp:140
process::metrics::PullGauge uptime_secs
Definition: metrics.hpp:47
process::metrics::PullGauge frameworks_disconnected
Definition: metrics.hpp:57
process::metrics::Counter slave_shutdowns_canceled
Definition: metrics.hpp:196
process::metrics::Counter offers_rescinded
Definition: metrics.hpp:263
process::metrics::PullGauge slaves_inactive
Definition: metrics.hpp:53
Metrics(const Master &master)
process::metrics::Counter messages_status_update
Definition: metrics.hpp:153
process::metrics::Counter messages_revive_offers
Definition: metrics.hpp:139
process::metrics::Counter slave_removals
Definition: metrics.hpp:185
process::metrics::Counter tasks_killed
Definition: metrics.hpp:71
process::metrics::Counter messages_register_framework
Definition: metrics.hpp:130
process::metrics::Counter operations
Definition: metrics.hpp:269
process::metrics::Counter slave_shutdowns_completed
Definition: metrics.hpp:195
process::metrics::Counter messages_register_slave
Definition: metrics.hpp:150
process::metrics::PullGauge slaves_active
Definition: metrics.hpp:52
process::metrics::Counter slave_removals_reason_unregistered
Definition: metrics.hpp:187
process::metrics::Counter messages_operation_status_update_acknowledgement
Definition: metrics.hpp:144
hashmap< mesos::scheduler::Call::Type, process::metrics::Counter > call_types
Definition: metrics.hpp:255
Definition: attributes.hpp:24
process::metrics::Counter messages_exited_executor
Definition: metrics.hpp:154
process::metrics::Counter messages_resource_request
Definition: metrics.hpp:136
process::metrics::Counter messages_framework_to_executor
Definition: metrics.hpp:143
hashmap< TaskState, SourcesReasons > tasks_states
Definition: metrics.hpp:82
process::metrics::Counter slave_removals_reason_registered
Definition: metrics.hpp:188
Type
Definition: capabilities.hpp:79
process::metrics::Counter invalid_operation_status_update_acknowledgements
Definition: metrics.hpp:172
std::string getFrameworkMetricPrefix(const FrameworkInfo &frameworkInfo)
std::vector< process::metrics::PullGauge > resources_used
Definition: metrics.hpp:204
process::metrics::Counter messages_unregister_slave
Definition: metrics.hpp:152
hashmap< mesos::scheduler::Event::Type, process::metrics::Counter > event_types
Definition: metrics.hpp:258
process::metrics::Counter tasks_gone_by_operator
Definition: metrics.hpp:76
process::metrics::Counter messages_decline_offers
Definition: metrics.hpp:138
hashmap< TaskState, process::metrics::Counter > terminal_task_states
Definition: metrics.hpp:265
process::metrics::Counter messages_reconcile_operations
Definition: metrics.hpp:141
Definition: metrics.hpp:41
Frameworks(const std::string &principal)
Definition: metrics.hpp:110
process::metrics::Counter messages_unregister_framework
Definition: metrics.hpp:132
process::metrics::PullGauge tasks_starting
Definition: metrics.hpp:65
process::metrics::Counter messages_received
Definition: metrics.hpp:100
process::metrics::Counter invalid_status_updates
Definition: metrics.hpp:166
process::metrics::Counter invalid_status_update_acknowledgements
Definition: metrics.hpp:169
process::metrics::PullGauge frameworks_connected
Definition: metrics.hpp:56
hashmap< Offer::Operation::Type, process::metrics::Counter > operation_types
Definition: metrics.hpp:270
process::metrics::Counter offers_declined
Definition: metrics.hpp:262
Definition: master.hpp:433
Definition: push_gauge.hpp:41
std::vector< process::metrics::PullGauge > resources_revocable_percent
Definition: metrics.hpp:210
process::metrics::PullGauge event_queue_messages
Definition: metrics.hpp:178
process::metrics::Counter slave_removals_reason_unhealthy
Definition: metrics.hpp:186
hashmap< TaskStatus::Reason, process::metrics::Counter > Reasons
Definition: metrics.hpp:78
process::metrics::PullGauge slaves_unreachable
Definition: metrics.hpp:54
process::metrics::Counter valid_operation_status_update_acknowledgements
Definition: metrics.hpp:171
process::metrics::Counter slave_registrations
Definition: metrics.hpp:183
process::metrics::Counter tasks_dropped
Definition: metrics.hpp:74
process::metrics::Counter offers_sent
Definition: metrics.hpp:260
process::metrics::PullGauge elected
Definition: metrics.hpp:48
process::metrics::Counter offers_accepted
Definition: metrics.hpp:261