Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-11511. Introduce metrics in deletion services of OM #7377

Merged
merged 23 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.om;

import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
import org.apache.hadoop.ozone.OzoneConsts;

/**
* Class contains metrics related to the OM Deletion services.
*/
@Metrics(about = "Deletion Service Metrics", context = OzoneConsts.OZONE)
public final class DeletingServiceMetrics {

public static final String METRICS_SOURCE_NAME =
DeletingServiceMetrics.class.getSimpleName();
private MetricsRegistry registry;

private DeletingServiceMetrics() {
this.registry = new MetricsRegistry(METRICS_SOURCE_NAME);
}

/**
* Creates and returns DeletingServiceMetrics instance.
*
* @return DeletingServiceMetrics
*/
public static DeletingServiceMetrics create() {
return DefaultMetricsSystem.instance().register(METRICS_SOURCE_NAME,
"Metrics tracking the progress of deletion of directories and keys in the OM",
new DeletingServiceMetrics());
}
/**
* Unregister the metrics instance.
*/
public static void unRegister() {
DefaultMetricsSystem.instance().unregisterSource(METRICS_SOURCE_NAME);
}


/*
* Total directory deletion metrics across all iterations of DirectoryDeletingService since last restart.
*/
@Metric("Total no. of directories deleted")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong numDirDeleted;
@Metric("Total no. of directories moved to deletedDirectoryTable")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong numDirsMoved;
@Metric("Total no. of files moved to deletedTable")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong numFilesMoved;

public void incrNumDirDeleted(long dirDel) {
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
numDirDeleted.incr(dirDel);
}

public void incrNumDirsMoved(long dirMove) {
numDirsMoved.incr(dirMove);
}

public void incrNumFilesMoved(long filesMove) {
numFilesMoved.incr(filesMove);
}

public void incrementDirectoryDeletionTotalMetrics(long dirDel, long dirMove, long filesMove) {
incrNumDirDeleted(dirDel);
incrNumDirsMoved(dirMove);
incrNumFilesMoved(filesMove);
}

/*
* Directory deletion metrics in the latest iteration of DirectoryDeletingService.
*/
@Metric("Iteration run count of DirectoryDeletingService")
private MutableGaugeLong iterationDirRunCount;
@Metric("Iteration start time of DirectoryDeletingService")
private MutableGaugeLong iterationDirStartTime;
@Metric("Total time taken by the last iteration of DirectoryDeletingService")
private MutableGaugeLong iterationDirDuration;
@Metric("No. of directories deleted in last iteration")
private MutableGaugeLong iterationDirDeleted;
@Metric("No. of sub-directories deleted in last iteration")
private MutableGaugeLong iterationSubDirDeleted;
@Metric("No. of sub-directories moved to deletedDirectoryTable in last iteration")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong iterationSubDirsMoved;
@Metric("No. of files moved to deletedTable in last iteration")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong iterationFilesMoved;

public void setIterationDirRunCount(long runcount) {
iterationDirRunCount.set(runcount);
}

public void setIterationDirStartTime(long startTime) {
iterationDirStartTime.set(startTime);
}

public void setIterationDirDuration(long duration) {
iterationDirDuration.set(duration);
}

public void setIterationDirDeleted(long dirDel) {
iterationDirDeleted.set(dirDel);
}

public void setIterationSubDirDeleted(long subdirDel) {
iterationSubDirDeleted.set(subdirDel);
}

public void setIterationSubDirsMoved(long filesMove) {
iterationFilesMoved.set(filesMove);
}

public void setIterationFilesMoved(long subdirMove) {
iterationSubDirsMoved.set(subdirMove);
}

public void setDirectoryDeletionIterationMetrics(long runcount, long startTime, long duration,
long dirDel, long subdirDel,
long filesMove, long subdirMove) {
setIterationDirRunCount(runcount);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we do not need capture runCount and startTime

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see anything wrong with capturing run count. It has been useful in the past to detect when the service has completed a run after a restart, and can be divided over past time intervals to get average time taken per run.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@errose28 I am not getting what we are going to achieve with runCount as metrics. If there are 10 threads running, it will be increase by 10 times as default.
We can use duration as metrics for performance, but not the startTime. This will give better metrics. So startTime may not be useful, this can be easily identified as Timer task.

For task running, based on delete count increase, and other data can show the progress. So Above metric is not useful.

setIterationDirStartTime(startTime);
setIterationDirDuration(duration);
setIterationDirDeleted(dirDel);
setIterationSubDirDeleted(subdirDel);
setIterationFilesMoved(filesMove);
setIterationSubDirsMoved(subdirMove);
}

/*
* Total key deletion metrics across all iterations of KeyDeletingService since last restart.
*/
@Metric("Total no. of keys processed")
private MutableGaugeLong numKeysProcessed;
@Metric("Total no. of keys sent to scm for deletion")
private MutableGaugeLong numKeysDeletionRequest;
@Metric("Total no. of keys deleted successfully")
private MutableGaugeLong numKeysDeleteSuccess;

public void incrNumKeysProcessed(long keysProcessed) {
this.numKeysProcessed.incr(keysProcessed);
}

public void incrNumKeysDeletionRequest(long keysDeletionRequest) {
this.numKeysDeletionRequest.incr(keysDeletionRequest);
}

public void incrNumKeysDeleteSuccess(long keysDeleteSuccess) {
this.numKeysDeleteSuccess.incr(keysDeleteSuccess);
}

Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
/*
* Key deletion metrics in the latest iteration of KeyDeletingService.
*/
@Metric("Iteration run count of KeyDeletingService")
private MutableGaugeLong iterationKeyRunCount;
@Metric("Iteration start time of KeyDeletingService")
private MutableGaugeLong iterationKeyStartTime;
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
@Metric("Total time taken by the last iteration of KeyDeletingService")
private MutableGaugeLong iterationKeyDuration;
@Metric("No. of keys processed in last iteration")
private MutableGaugeLong iterationKeysProcessed;
@Metric("Total no. of keys sent to scm for deletion")
private MutableGaugeLong iterationKeysDeletionRequest;
@Metric("Total no. of keys deleted successfully")
private MutableGaugeLong iterationKeysDeleteSuccess;

public void setIterationKeyRunCount(long iterationKeyRunCount) {
this.iterationKeyRunCount.set(iterationKeyRunCount);
}

public void setIterationKeyStartTime(long iterationKeyStartTime) {
this.iterationKeyStartTime.set(iterationKeyStartTime);
}

public void setIterationKeyDuration(long iterationKeyDuration) {
this.iterationKeyDuration.set(iterationKeyDuration);
}

public void setIterationKeysProcessed(long iterationKeysProcessed) {
this.iterationKeysProcessed.set(iterationKeysProcessed);
}

public void setIterationKeysDeletionRequest(long iterationKeysDeletionRequest) {
this.iterationKeysDeletionRequest.set(iterationKeysDeletionRequest);
}

public void setIterationKeysDeleteSuccess(long iterationKeysDeleteSuccess) {
this.iterationKeysDeleteSuccess.set(iterationKeysDeleteSuccess);
}

/*
* Directory purge request metrics.
*/
@Metric("Total no. of directories purged")
private MutableGaugeLong numDirPurged;
@Metric("Total no. of subFiles purged")
private MutableGaugeLong numSubKeysPurged;
@Metric("No. of directories purged in latest request")
private MutableGaugeLong numDirPurgedInLatestRequest;
@Metric("No. of subFiles purged in latest request")
private MutableGaugeLong numSubKeysPurgedInLatestRequest;
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved

Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
public void incrNumDirPurged(long dirPurged) {
this.numDirPurged.incr(dirPurged);
}

public void incrNumSubKeysPurged(long subKeysPurged) {
this.numSubKeysPurged.incr(subKeysPurged);
}

public void setNumDirPurgedInLatestRequest(long numDirPurgedInLastRequest) {
this.numDirPurgedInLatestRequest.set(numDirPurgedInLastRequest);
}

public void setNumSubKeysPurgedInLatestRequest(long numSubKeysPurgedInLastRequest) {
this.numSubKeysPurgedInLatestRequest.set(numSubKeysPurgedInLastRequest);
}

/*
* Key purge request metrics.
*/

@Metric("Total no. of keys purged")
private MutableGaugeLong numKeysPurged;
@Metric("No. of keys purged in latest request")
private MutableGaugeLong numKeysPurgedInLatestRequest;

public void incrNumKeysPurged(long keysPurged) {
this.numKeysPurged.incr(keysPurged);
}

public void setNumKeysPurgedInLatestRequest(long numSubKeysPurgedInLastRequest) {
this.numKeysPurgedInLatestRequest.set(numSubKeysPurgedInLastRequest);
}

/*
* OpenKeyCleanupService related metrics.
*/

@Metric("Last iteration run count of OpenKeyCleanupService")
private MutableGaugeLong iterationRunCountOpenKeyCleanup;
@Metric("Last iteration start time of OpenKeyCleanupService")
private MutableGaugeLong iterationStartTimeOpenKeyCleanup;
@Metric("Time taken by the last iteration of OpenKeyCleanupService")
private MutableGaugeLong iterationDurationOpenKeyCleanup;
@Metric("No. of keys deleted by OpenKeyCleanupService in last iteration")
private MutableGaugeLong iterationOpenKeysDeleted;
@Metric("No. of keys deleted by OpenKeyCleanupService in last iteration")
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
private MutableGaugeLong iterationOpenKeysCommitted;

public void setIterationRunCountOpenKeyCleanup(
long iterationRunCountOpenKeyCleanup) {
this.iterationRunCountOpenKeyCleanup.set(iterationRunCountOpenKeyCleanup);
}

public void setIterationStartTimeOpenKeyCleanup(
long iterationStartTimeOpenKeyCleanup) {
this.iterationStartTimeOpenKeyCleanup.set(iterationStartTimeOpenKeyCleanup);
}

public void setIterationDurationOpenKeyCleanup(
long iterationDurationOpenKeyCleanup) {
this.iterationDurationOpenKeyCleanup.set(iterationDurationOpenKeyCleanup);
}

public void setIterationOpenKeysDeleted(long iterationOpenKeysDeleted) {
this.iterationOpenKeysDeleted.set(iterationOpenKeysDeleted);
}

public void setIterationOpenKeysCommitted(long iterationOpenKeysCommitted) {
this.iterationOpenKeysCommitted.set(iterationOpenKeysCommitted);
}

public void setOpenKeyCleanupIterationMetrics(long runcount, long startTime, long duration,
long keysDeleted, long keysCommitted) {
setIterationRunCountOpenKeyCleanup(runcount);
setIterationStartTimeOpenKeyCleanup(startTime);
setIterationDurationOpenKeyCleanup(duration);
setIterationOpenKeysDeleted(keysDeleted);
setIterationOpenKeysCommitted(keysCommitted);
}

@Metric("Total no. of keys deleted by OpenKeyCleanupService since last restart")
private MutableGaugeLong totalOpenKeysDeleted;
@Metric("Total no. of keys committed by OpenKeyCleanupService since last restart")
private MutableGaugeLong totalOpenKeysCommitted;

public void setTotalOpenKeysDeleted(long openKeysDeleted) {
this.totalOpenKeysDeleted.incr(openKeysDeleted);
}

public void setTotalOpenKeysCommitted(long openKeysCommitted) {
this.totalOpenKeysCommitted.incr(openKeysCommitted);
}

public void setOpenKeyCleanupTotalMetrics(long keysDeleted, long keysCommitted) {
setTotalOpenKeysDeleted(keysDeleted);
setTotalOpenKeysCommitted(keysCommitted);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ public final class OzoneManager extends ServiceRuntimeInfoImpl
private OMHAMetrics omhaMetrics;
private final ProtocolMessageMetrics<ProtocolMessageEnum>
omClientProtocolMetrics;
private final DeletingServiceMetrics omDeletionMetrics;
private OzoneManagerHttpServer httpServer;
private final OMStorage omStorage;
private ObjectName omInfoBeanName;
Expand Down Expand Up @@ -687,6 +688,7 @@ private OzoneManager(OzoneConfiguration conf, StartupOption startupOption)

metrics = OMMetrics.create();
perfMetrics = OMPerformanceMetrics.register();
omDeletionMetrics = DeletingServiceMetrics.create();
// Get admin list
omStarterUser = UserGroupInformation.getCurrentUser().getShortUserName();
omAdmins = OzoneAdmins.getOzoneAdmins(omStarterUser, conf);
Expand Down Expand Up @@ -1649,6 +1651,9 @@ public OMMetrics getMetrics() {
public OMPerformanceMetrics getPerfMetrics() {
return perfMetrics;
}
public DeletingServiceMetrics getDeletionMetrics() {
return omDeletionMetrics;
}

/**
* Start service.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.DeletingServiceMetrics;
import org.apache.hadoop.ozone.om.snapshot.SnapshotUtils;
import org.apache.ratis.server.protocol.TermIndex;
import org.apache.hadoop.ozone.om.OzoneManager;
Expand Down Expand Up @@ -75,6 +76,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
OmMetadataManagerImpl omMetadataManager = (OmMetadataManagerImpl) ozoneManager.getMetadataManager();
Map<String, OmKeyInfo> openKeyInfoMap = new HashMap<>();
OMMetrics omMetrics = ozoneManager.getMetrics();
DeletingServiceMetrics deletingServiceMetrics = ozoneManager.getDeletionMetrics();
OMResponse.Builder omResponse = OmResponseUtil.getOMResponseBuilder(
getOmRequest());
final SnapshotInfo fromSnapshotInfo;
Expand All @@ -98,6 +100,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
return new OMDirectoriesPurgeResponseWithFSO(createErrorOMResponse(omResponse, e));
}
try {
int numDirDeleted = 0, numSubFilesDeleted = 0;
for (OzoneManagerProtocolProtos.PurgePathRequest path : purgeRequests) {
for (OzoneManagerProtocolProtos.KeyInfo key :
path.getMarkDeletedSubDirsList()) {
Expand All @@ -111,6 +114,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
lockSet.add(volBucketPair);
}
omMetrics.decNumKeys();
deletingServiceMetrics.incrNumDirPurged(1);
numDirDeleted++;
OmBucketInfo omBucketInfo = getBucketInfo(omMetadataManager,
volumeName, bucketName);
// bucketInfo can be null in case of delete volume or bucket
Expand Down Expand Up @@ -153,6 +158,8 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
}

omMetrics.decNumKeys();
deletingServiceMetrics.incrNumSubKeysPurged(1);
numSubFilesDeleted++;
OmBucketInfo omBucketInfo = getBucketInfo(omMetadataManager,
volumeName, bucketName);
// bucketInfo can be null in case of delete volume or bucket
Expand All @@ -169,6 +176,9 @@ public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager, TermIn
}
}
}
deletingServiceMetrics.setNumDirPurgedInLatestRequest(numDirDeleted);
deletingServiceMetrics.setNumSubKeysPurgedInLatestRequest(numSubFilesDeleted);

if (fromSnapshotInfo != null) {
fromSnapshotInfo.setLastTransactionInfo(TransactionInfo.valueOf(termIndex).toByteString());
omMetadataManager.getSnapshotInfoTable().addCacheEntry(new CacheKey<>(fromSnapshotInfo.getTableKey()),
Expand Down
Loading