Skip to content

Commit

Permalink
feat: change DownloadPeerDuration metric type to summary (#2794)
Browse files Browse the repository at this point in the history
feat: change DownloadPeerDuration metric to Summary

Signed-off-by: Gaius <gaius.qi@gmail.com>
  • Loading branch information
gaius-qi authored Oct 11, 2023
1 parent 82a5b7b commit 6addfdf
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 15 deletions.
179 changes: 172 additions & 7 deletions scheduler/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,13 @@ var (
Help: "Counter of the number of per host traffic.",
}, []string{"type", "task_type", "task_tag", "task_app", "host_type", "host_id", "host_ip", "host_name"})

DownloadPeerDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.SchedulerMetricsName,
Name: "download_peer_duration_milliseconds",
Help: "Histogram of the time each peer downloading.",
Buckets: []float64{100, 200, 500, 1000, 1500, 2 * 1000, 3 * 1000, 5 * 1000, 10 * 1000, 20 * 1000, 60 * 1000, 120 * 1000, 300 * 1000},
}, []string{"priority", "task_type", "task_tag", "task_app", "task_content_length", "host_type"})
DownloadPeerDuration = promauto.NewSummaryVec(prometheus.SummaryOpts{
Namespace: types.MetricsNamespace,
Subsystem: types.SchedulerMetricsName,
Name: "download_peer_duration_milliseconds",
Help: "Summary of the time each peer downloading.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
}, []string{"task_size_level"})

ConcurrentScheduleGauge = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: types.MetricsNamespace,
Expand Down Expand Up @@ -287,3 +287,168 @@ func New(cfg *config.MetricsConfig, svr *grpc.Server) *http.Server {
Handler: mux,
}
}

// TaskSizeLevel is the level of the task size.
type TaskSizeLevel int

// String returns the string representation of the TaskSizeLevel.
func (t TaskSizeLevel) String() string {
switch t {
case TaskSizeLevel0:
return "0"
case TaskSizeLevel1:
return "1"
case TaskSizeLevel2:
return "2"
case TaskSizeLevel3:
return "3"
case TaskSizeLevel4:
return "4"
case TaskSizeLevel5:
return "5"
case TaskSizeLevel6:
return "6"
case TaskSizeLevel7:
return "7"
case TaskSizeLevel8:
return "8"
case TaskSizeLevel9:
return "9"
case TaskSizeLevel10:
return "10"
case TaskSizeLevel11:
return "11"
case TaskSizeLevel12:
return "12"
case TaskSizeLevel13:
return "13"
case TaskSizeLevel14:
return "14"
case TaskSizeLevel15:
return "15"
case TaskSizeLevel16:
return "16"
case TaskSizeLevel17:
return "17"
case TaskSizeLevel18:
return "18"
case TaskSizeLevel19:
return "19"
case TaskSizeLevel20:
return "20"
default:
return "0"
}
}

const (
// TaskSizeLevel0 represents unknow size.
TaskSizeLevel0 TaskSizeLevel = iota

// TaskSizeLevel0 represents size range is from 0 to 1M.
TaskSizeLevel1

// TaskSizeLevel1 represents size range is from 1M to 4M.
TaskSizeLevel2

// TaskSizeLevel2 represents size range is from 4M to 8M.
TaskSizeLevel3

// TaskSizeLevel3 represents size range is from 8M to 16M.
TaskSizeLevel4

// TaskSizeLevel4 represents size range is from 16M to 32M.
TaskSizeLevel5

// TaskSizeLevel5 represents size range is from 32M to 64M.
TaskSizeLevel6

// TaskSizeLevel6 represents size range is from 64M to 128M.
TaskSizeLevel7

// TaskSizeLevel7 represents size range is from 128M to 256M.
TaskSizeLevel8

// TaskSizeLevel8 represents size range is from 256M to 512M.
TaskSizeLevel9

// TaskSizeLevel9 represents size range is from 512M to 1G.
TaskSizeLevel10

// TaskSizeLevel10 represents size range is from 1G to 4G.
TaskSizeLevel11

// TaskSizeLevel11 represents size range is from 4G to 8G.
TaskSizeLevel12

// TaskSizeLevel12 represents size range is from 8G to 16G.
TaskSizeLevel13

// TaskSizeLevel13 represents size range is from 16G to 32G.
TaskSizeLevel14

// TaskSizeLevel14 represents size range is from 32G to 64G.
TaskSizeLevel15

// TaskSizeLevel15 represents size range is from 64G to 128G.
TaskSizeLevel16

// TaskSizeLevel16 represents size range is from 128G to 256G.
TaskSizeLevel17

// TaskSizeLevel17 represents size range is from 256G to 512G.
TaskSizeLevel18

// TaskSizeLevel18 represents size range is from 512G to 1T.
TaskSizeLevel19

// TaskSizeLevel20 represents size is greater than 1T.
TaskSizeLevel20
)

// CalculateSizeLevel calculates the size level according to the size.
func CalculateSizeLevel(size int64) TaskSizeLevel {
if size <= 0 {
return TaskSizeLevel0
} else if size < 1024*1024 {
return TaskSizeLevel1
} else if size < 4*1024*1024 {
return TaskSizeLevel2
} else if size < 8*1024*1024 {
return TaskSizeLevel3
} else if size < 16*1024*1024 {
return TaskSizeLevel4
} else if size < 32*1024*1024 {
return TaskSizeLevel5
} else if size < 64*1024*1024 {
return TaskSizeLevel6
} else if size < 128*1024*1024 {
return TaskSizeLevel7
} else if size < 256*1024*1024 {
return TaskSizeLevel8
} else if size < 512*1024*1024 {
return TaskSizeLevel9
} else if size < 1024*1024*1024 {
return TaskSizeLevel10
} else if size < 4*1024*1024*1024 {
return TaskSizeLevel11
} else if size < 8*1024*1024*1024 {
return TaskSizeLevel12
} else if size < 16*1024*1024*1024 {
return TaskSizeLevel13
} else if size < 32*1024*1024*1024 {
return TaskSizeLevel14
} else if size < 64*1024*1024*1024 {
return TaskSizeLevel15
} else if size < 128*1024*1024*1024 {
return TaskSizeLevel16
} else if size < 256*1024*1024*1024 {
return TaskSizeLevel17
} else if size < 512*1024*1024*1024 {
return TaskSizeLevel18
} else if size < 1024*1024*1024*1024 {
return TaskSizeLevel19
} else {
return TaskSizeLevel20
}
}
6 changes: 2 additions & 4 deletions scheduler/service/service_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,13 @@ func (v *V1) ReportPeerResult(ctx context.Context, req *schedulerv1.PeerResult)
go v.createDownloadRecord(peer, parents, req)
v.handleTaskSuccess(ctx, peer.Task, req)
v.handlePeerSuccess(ctx, peer)
metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(req.GetCost()))
metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(req.GetCost()))
return nil
}

go v.createDownloadRecord(peer, parents, req)
v.handlePeerSuccess(ctx, peer)
metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(req.GetCost()))
metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(req.GetCost()))
return nil
}

Expand Down
6 changes: 2 additions & 4 deletions scheduler/service/service_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -967,8 +967,7 @@ func (v *V2) handleDownloadPeerFinishedRequest(ctx context.Context, peerID strin
metrics.DownloadPeerCount.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Host.Type.Name()).Inc()
// TODO to be determined which traffic type to use, temporarily use TrafficType_REMOTE_PEER instead
metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(peer.Cost.Load()))
metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(peer.Cost.Load()))

return nil
}
Expand Down Expand Up @@ -1018,8 +1017,7 @@ func (v *V2) handleDownloadPeerBackToSourceFinishedRequest(ctx context.Context,
metrics.DownloadPeerCount.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Host.Type.Name()).Inc()
// TODO to be determined which traffic type to use, temporarily use TrafficType_REMOTE_PEER instead
metrics.DownloadPeerDuration.WithLabelValues(priority.String(), peer.Task.Type.String(),
peer.Task.Tag, peer.Task.Application, peer.Task.ContentLength.String(), peer.Host.Type.Name()).Observe(float64(peer.Cost.Load()))
metrics.DownloadPeerDuration.WithLabelValues(metrics.CalculateSizeLevel(peer.Task.ContentLength.Load()).String()).Observe(float64(peer.Cost.Load()))

return nil
}
Expand Down

0 comments on commit 6addfdf

Please sign in to comment.