Skip to content

Commit

Permalink
fix: make histogramQuantile handle case of zero samples (#5419)
Browse files Browse the repository at this point in the history
* fix: make histogramQuantile handle case of zero samples

* chore: fix comment typo in stdlib/universe/histogram_quantile.go

Co-authored-by: Gavin Cabbage <gavincabbage@users.noreply.github.com>

---------

Co-authored-by: Gavin Cabbage <gavincabbage@users.noreply.github.com>
  • Loading branch information
Christopher M. Wolff and gavincabbage authored May 26, 2023
1 parent 8ad3663 commit d8995bb
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 29 deletions.
4 changes: 2 additions & 2 deletions libflux/go/libflux/buildinfo.gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ var sourceHashes = map[string]string{
"stdlib/experimental/polyline/polyline_test.flux": "16473dce4f71dcdbe1e3f90b350ab1e56a513679cb5c3f872e0f2d7678d42d1f",
"stdlib/experimental/preview_test.flux": "cca570d25b17ed201a0ecc7ebf9e547ccff2aa0814a3ac49f12faa938cbdaf73",
"stdlib/experimental/prometheus/prometheus.flux": "dc01322d3c0655661a8c2279c69acf50d02791a670fb0d681947af6726bc2f4d",
"stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "15da11b9c9d43cbc1c579de7064d7f3870b1c77b610ce3c567e8eb14f40ee090",
"stdlib/experimental/prometheus/prometheus_histogramQuantile_test.flux": "95a708f68ebd42b7d88f1ec62e8f8aa2f08995f64ed280e5bf41f910a0502057",
"stdlib/experimental/quantile_test.flux": "e3cf2ee9716c1179139d0a388c24b24f1c25ca329b27bebaeb53b7e1b608ead2",
"stdlib/experimental/query/from.flux": "713b7feb6904d64cf4cbd235396ff6ff20e1eb96578190c0ac3be4f37df7c362",
"stdlib/experimental/record/record.flux": "273eebb2ee5cb9b153940ca0f42ed5b97b3d86de07d91c96a75508682d84feae",
Expand Down Expand Up @@ -493,7 +493,7 @@ var sourceHashes = map[string]string{
"stdlib/universe/highestAverage_test.flux": "65744d16b7c5d8ac2f4e3c47621195de1840ad72b12bfbb692d4f645e71a00a8",
"stdlib/universe/highestCurrent_test.flux": "c285ff40a7d8789d2c20bcf90d8063b710499ddc24621d627f6693c30351ebe5",
"stdlib/universe/highestMax_test.flux": "fff9f21422d2607afb9ff2786d67d173c7cd797418eb7b3521f8cf7e49443b88",
"stdlib/universe/histogram_quantile_test.flux": "3ce3d5e6ce27fd8bb2e84da031faca2dafef2566737842e534fbd667ffa8a4f6",
"stdlib/universe/histogram_quantile_test.flux": "f7f6799d6787e3e291ccdbbbd1787ca3ca505f0a901b99f4381b0f3ee1b62c77",
"stdlib/universe/histogram_test.flux": "e9e9775f80ac7c2a76044e6e0e8a89045d736c6ab618e8de6d7d1ebe9848938e",
"stdlib/universe/holt_winters_panic_test.flux": "204eb8044d634e5350a364eac466eb51e7f549e4ac7f454de7b244ba272b248f",
"stdlib/universe/holt_winters_test.flux": "9bc8441527867b6c075d003034a3def1748766df478ba8b434e2a2297ead0ec0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -375,3 +375,34 @@ testcase prometheus_histogramQuantile_onNonmonotonicDrop {

testing.diff(got: got, want: want)
}

testcase prometheus_histogramQuantile_zeroSamples {
inData =
"#group,false,false,true,true,false,false,true,true
#datatype,string,long,string,string,dateTime:RFC3339,double,string,string
#default,_result,,,,,,,
,result,table,_field,_measurement,_time,_value,le,org
,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.001,0001
,,2,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.005,0001
,,3,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.025,0001
,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.125,0001
,,4,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,0.625,0001
,,5,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,3.125,0001
,,6,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,15.625,0001
,,7,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00.412729Z,0,+Inf,0001
"
outData =
"#group,false,false,true,true,true,true,false,true,false,true
#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#default,_result,,,,,,,,,
,result,table,_field,_measurement,_start,_stop,_time,org,_value,quantile
,,0,qc_all_duration_seconds,prometheus,2021-10-08T00:00:00Z,2021-10-08T00:01:00Z,2021-10-08T00:00:00.412729Z,0001,,0.99
"
want = csv.from(csv: outData)
got =
csv.from(csv: inData)
|> range(start: 2021-10-08T00:00:00Z, stop: 2021-10-08T00:01:00Z)
|> prometheus.histogramQuantile(quantile: 0.99, metricVersion: 2)

testing.diff(got: got, want: want)
}
100 changes: 73 additions & 27 deletions stdlib/universe/histogram_quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,51 +251,97 @@ func (t histogramQuantileTransformation) Process(id execute.DatasetID, tbl flux.
})
}

q, ok, err := t.computeQuantile(cdf)
result, err := t.computeQuantile(cdf)
if err != nil {
return err
}
if !ok {
if result.action == drop {
return nil
}
if err := execute.AppendKeyValues(tbl.Key(), builder); err != nil {
return err
}
if err := builder.AppendFloat(valueIdx, q); err != nil {
return err
if result.action == appendValue {
if err := builder.AppendFloat(valueIdx, result.v); err != nil {
return err
}
} else {
// action is appendNil
if err := builder.AppendNil(valueIdx); err != nil {
return err
}

}
return nil
}

func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64, bool, error) {
type quantileAction int

const (
appendValue quantileAction = iota
appendNil
drop
)

type quantileResult struct {
action quantileAction
v float64
}

// isMonotonic will check if the buckets are monotonic and
// return true if so.
//
// If force is set, it will force them to be monotonic
// by assuming no increase from the previous bucket.
// When force is set, this function will always return true.
func isMonotonic(force bool, cdf []bucket) bool {
prevCount := 0.0
for i := range cdf {
if cdf[i].count < prevCount {
if force {
cdf[i].count = prevCount
} else {
return false
}
} else {
prevCount = cdf[i].count
}
}
return true
}

func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (quantileResult, error) {
if len(cdf) == 0 {
return 0, false, errors.New(codes.FailedPrecondition, "histogram is empty")
return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram is empty")
}

if !isMonotonic(t.spec.OnNonmonotonic == onNonmonotonicForce, cdf) {
switch t.spec.OnNonmonotonic {
case onNonmonotonicError:
return quantileResult{}, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic")
case onNonmonotonicDrop:
return quantileResult{action: drop}, nil
default:
// "force" is not possible because isMonotonic will fix the buckets
return quantileResult{}, errors.Newf(codes.Internal, "unknown or unexpected value for onNonmonotonic: %q", t.spec.OnNonmonotonic)
}
}

// Find rank index and check counts are monotonic
prevCount := 0.0
totalCount := cdf[len(cdf)-1].count
if totalCount == 0 {
// Produce a null value if there were no samples
return quantileResult{action: appendNil}, nil
}

rank := t.spec.Quantile * totalCount
rankIdx := -1
for i, b := range cdf {
if b.count < prevCount {
switch t.spec.OnNonmonotonic {
case onNonmonotonicError:
return 0, false, errors.New(codes.FailedPrecondition, "histogram records counts are not monotonic")
case onNonmonotonicForce:
b.count = prevCount
case onNonmonotonicDrop:
return 0, false, nil
default:
return 0, false, errors.Newf(codes.Internal, "unknown value for onNonmonotonic: %q", t.spec.OnNonmonotonic)
}
} else {
prevCount = b.count
}

if rank >= b.count {
rankIdx = i
}
}

var (
lowerCount,
lowerBound,
Expand All @@ -311,7 +357,7 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64
upperBound = cdf[0].upperBound
case len(cdf) - 1:
// Quantile is above the highest upper bound, simply return it as it must be finite
return cdf[len(cdf)-1].upperBound, true, nil
return quantileResult{action: appendValue, v: cdf[len(cdf)-1].upperBound}, nil
default:
lowerCount = cdf[rankIdx].count
lowerBound = cdf[rankIdx].upperBound
Expand All @@ -320,19 +366,19 @@ func (t *histogramQuantileTransformation) computeQuantile(cdf []bucket) (float64
}
if rank == lowerCount {
// No need to interpolate
return lowerBound, true, nil
return quantileResult{action: appendValue, v: lowerBound}, nil
}
if math.IsInf(lowerBound, -1) {
// We cannot interpolate with infinity
return upperBound, true, nil
return quantileResult{action: appendValue, v: upperBound}, nil
}
if math.IsInf(upperBound, 1) {
// We cannot interpolate with infinity
return lowerBound, true, nil
return quantileResult{action: appendValue, v: lowerBound}, nil
}
// Compute quantile using linear interpolation
scale := (rank - lowerCount) / (upperCount - lowerCount)
return lowerBound + (upperBound-lowerBound)*scale, true, nil
return quantileResult{action: appendValue, v: lowerBound + (upperBound-lowerBound)*scale}, nil
}

func (t histogramQuantileTransformation) UpdateWatermark(id execute.DatasetID, mark execute.Time) error {
Expand Down
80 changes: 80 additions & 0 deletions stdlib/universe/histogram_quantile_test.flux
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,42 @@ testcase histogramQuantileOnNonmonotonicForce {
testing.diff(got, want)
}

testcase histogramQuantileOnNonmonotonicForceLastBucket {
inData =
"
#datatype,string,long,dateTime:RFC3339,string,double,double,string
#group,false,false,true,true,false,false,true
#default,_result,,,,,,
,result,table,_time,_field,_value,le,_measurement
,,0,2018-05-22T19:53:00Z,x_duration_seconds,1,0.1,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.2,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.3,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.4,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.5,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.6,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,2,0.7,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,8,0.8,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,10,0.9,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l
"
outData =
"
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#group,false,false,true,true,true,true,false,true
#default,_result,,,,,,,
,result,table,_start,_stop,_time,_field,_value,_measurement
,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,0.8500000000000001,l
"

got =
csv.from(csv: inData)
|> range(start: 2018-05-22T19:53:00Z)
|> histogramQuantile(quantile: 0.9, onNonmonotonic: "force")
want = csv.from(csv: outData)

testing.diff(got, want)
}

testcase histogramQuantileOnNonmonotonicDrop {
inData =
"
Expand Down Expand Up @@ -214,3 +250,47 @@ testcase histogramQuantileOnNonmonotonicDrop {

testing.diff(got, want)
}

testcase histogramQuantileNoSamples {
inData =
"
#datatype,string,long,dateTime:RFC3339,string,double,double,string
#group,false,false,true,true,false,false,true
#default,_result,,,,,,
,result,table,_time,_field,_value,le,_measurement
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.1,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.2,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.3,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.4,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.5,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.6,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.7,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.8,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,0.9,l
,,0,2018-05-22T19:53:00Z,x_duration_seconds,0,+Inf,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,0,-Inf,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,10,0.2,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,15,0.4,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,25,0.6,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,35,0.8,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,1,l
,,1,2018-05-22T19:53:00Z,y_duration_seconds,45,+Inf,l
"
outData =
"
#datatype,string,long,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,string,double,string
#group,false,false,true,true,true,true,false,true
#default,_result,,,,,,,
,result,table,_start,_stop,_time,_field,_value,_measurement
,,0,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,x_duration_seconds,,l
,,1,2018-05-22T19:53:00Z,2030-01-01T00:00:00Z,2018-05-22T19:53:00Z,y_duration_seconds,0.91,l
"

got =
csv.from(csv: inData)
|> range(start: 2018-05-22T19:53:00Z)
|> histogramQuantile(quantile: 0.9, onNonmonotonic: "drop")
want = csv.from(csv: outData)

testing.diff(got, want)
}

0 comments on commit d8995bb

Please sign in to comment.