Apache Mesos
statistics.hpp
Go to the documentation of this file.
1 // Licensed under the Apache License, Version 2.0 (the "License");
2 // you may not use this file except in compliance with the License.
3 // You may obtain a copy of the License at
4 //
5 // http://www.apache.org/licenses/LICENSE-2.0
6 //
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License
12 
13 #ifndef __PROCESS_STATISTICS_HPP__
14 #define __PROCESS_STATISTICS_HPP__
15 
16 #include <glog/logging.h>
17 
18 #include <algorithm>
19 #include <iterator>
20 #include <type_traits>
21 #include <vector>
22 
23 #include <process/timeseries.hpp>
24 
25 #include <stout/foreach.hpp>
26 #include <stout/option.hpp>
27 
28 namespace process {
29 
30 // Represents statistics for a `TimeSeries` of data or a standard container.
31 template <typename T>
32 struct Statistics
33 {
34  // Returns `Statistics` for the given `TimeSeries`, or `None` if the
35  // `TimeSeries` has less then 2 datapoints.
36  //
37  // TODO(dhamon): Consider adding a histogram abstraction for better
38  // performance.
39  //
40  // Remove this specification once we can construct directly from
41  // `TimeSeries<T>::Value`, e.g., by using an iterator adaptor, see
42  // https://www.boost.org/doc/libs/1_51_0/libs/range/doc/html/range/reference/adaptors/reference/map_values.html // NOLINT(whitespace/line_length)
43  static Option<Statistics<T>> from(const TimeSeries<T>& timeseries)
44  {
45  std::vector<typename TimeSeries<T>::Value> values_ = timeseries.get();
46 
47  std::vector<T> values;
48  values.reserve(values_.size());
49 
50  foreach (const typename TimeSeries<T>::Value& value, values_) {
51  values.push_back(value.data);
52  }
53 
54  return from(std::move(values));
55  }
56 
57  // Returns `Statistics` for the given container, or `None` if the container
58  // has less then 2 datapoints. The container is represented as a pair of
59  // [first, last) iterators.
60  //
61  // TODO(alexr): Consider relaxing the collection type requirement to
62  // `std::is_convertible<std::iterator_traits<It>::value_type, T>`.
63  template <
64  typename It,
65  typename = typename std::enable_if<
66  std::is_same<
67  typename std::iterator_traits<It>::value_type,
68  T>::value &&
69  std::is_convertible<
70  typename std::iterator_traits<It>::iterator_category,
71  std::forward_iterator_tag>::value>::type>
72  static Option<Statistics<T>> from(It first, It last)
73  {
74  // Copy values into a vector.
75  std::vector<T> values;
76  values.reserve(std::distance(first, last));
77 
78  std::copy(first, last, std::back_inserter(values));
79 
80  return from(std::move(values));
81  }
82 
83  size_t count;
84 
85  T min;
86  T max;
87 
88  // TODO(dhamon): Consider making the percentiles we store dynamic.
89  T p25;
90  T p50;
91  T p75;
92  T p90;
93  T p95;
94  T p99;
95  T p999;
96  T p9999;
97 
98 private:
99  // Calculates `Statistics` from the provided vector; note pass by reference.
100  static Option<Statistics<T>> from(std::vector<T>&& values)
101  {
102  // We need at least 2 values to compute aggregates.
103  if (values.size() < 2) {
104  return None();
105  }
106 
107  std::sort(values.begin(), values.end());
108 
110 
111  statistics.count = values.size();
112 
113  statistics.min = values.front();
114  statistics.max = values.back();
115 
116  statistics.p25 = percentile(values, 0.25);
117  statistics.p50 = percentile(values, 0.5);
118  statistics.p75 = percentile(values, 0.75);
119  statistics.p90 = percentile(values, 0.90);
120  statistics.p95 = percentile(values, 0.95);
121  statistics.p99 = percentile(values, 0.99);
122  statistics.p999 = percentile(values, 0.999);
123  statistics.p9999 = percentile(values, 0.9999);
124 
125  return statistics;
126  }
127 
128  // Returns the requested percentile from the sorted values.
129  // Note that we need at least two values to compute percentiles!
130  //
131  // TODO(dhamon): Use a 'Percentage' abstraction.
132  static T percentile(const std::vector<T>& values, double percentile)
133  {
134  CHECK_GE(values.size(), 2u);
135 
136  if (percentile <= 0.0) {
137  return values.front();
138  }
139 
140  if (percentile >= 1.0) {
141  return values.back();
142  }
143 
144  // Use linear interpolation.
145  const double position = percentile * (values.size() - 1);
146  const size_t index = static_cast<size_t>(floor(position));
147  const double delta = position - index;
148 
149  CHECK_GE(index, 0u);
150  CHECK_LT(index, values.size() - 1);
151 
152  return values[index] + (values[index + 1] - values[index]) * delta;
153  }
154 };
155 
156 } // namespace process {
157 
158 #endif // __PROCESS_STATISTICS_HPP__
size_t count
Definition: statistics.hpp:83
T max
Definition: statistics.hpp:86
Definition: option.hpp:29
T p75
Definition: statistics.hpp:91
T p95
Definition: statistics.hpp:93
std::vector< Value > get(const Option< Time > &start=None(), const Option< Time > &stop=None()) const
Definition: timeseries.hpp:86
T p9999
Definition: statistics.hpp:96
static Option< Statistics< T > > from(const TimeSeries< T > &timeseries)
Definition: statistics.hpp:43
T min
Definition: statistics.hpp:85
T p50
Definition: statistics.hpp:90
Definition: none.hpp:27
Definition: executor.hpp:48
Definition: timeseries.hpp:60
T data
Definition: timeseries.hpp:66
T p25
Definition: statistics.hpp:89
Try< uint32_t > type(const std::string &path)
T p99
Definition: statistics.hpp:94
T p90
Definition: statistics.hpp:92
T p999
Definition: statistics.hpp:95
T copy(const T &t)
Definition: utils.hpp:21
Definition: statistics.hpp:32
Definition: timeseries.hpp:52
static Option< Statistics< T > > from(It first, It last)
Definition: statistics.hpp:72