Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-11699. Remove unnecessary information about parts when downloading multipart files. #7558

Merged
merged 9 commits into from
Jan 9, 2025
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ public enum OzoneManagerVersion implements ComponentVersion {
S3_OBJECT_TAGGING_API(9, "OzoneManager version that supports S3 object tagging APIs, such as " +
"PutObjectTagging, GetObjectTagging, and DeleteObjectTagging"),

S3_PART_AWARE_GET(10, "OzoneManager version that supports S3 get for a specific multipart " +
"upload part number"),

FUTURE_VERSION(-1, "Used internally in the client when the server side is "
+ " newer and an unknown server version has arrived to the client.");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1765,16 +1765,21 @@ public OzoneKeyDetails getS3KeyDetails(String bucketName, String keyName)
@Override
public OzoneKeyDetails getS3KeyDetails(String bucketName, String keyName,
int partNumber) throws IOException {
OmKeyInfo keyInfo = getS3KeyInfo(bucketName, keyName, false);
List<OmKeyLocationInfo> filteredKeyLocationInfo = keyInfo
.getLatestVersionLocations().getBlocksLatestVersionOnly().stream()
.filter(omKeyLocationInfo -> omKeyLocationInfo.getPartNumber() ==
partNumber)
.collect(Collectors.toList());
keyInfo.updateLocationInfoList(filteredKeyLocationInfo, false);
keyInfo.setDataSize(filteredKeyLocationInfo.stream()
.mapToLong(OmKeyLocationInfo::getLength)
.sum());
OmKeyInfo keyInfo;
if (omVersion.compareTo(OzoneManagerVersion.S3_PART_AWARE_GET) >= 0) {
keyInfo = getS3PartKeyInfo(bucketName, keyName, partNumber);
} else {
keyInfo = getS3KeyInfo(bucketName, keyName, false);
List<OmKeyLocationInfo> filteredKeyLocationInfo = keyInfo
.getLatestVersionLocations().getBlocksLatestVersionOnly().stream()
.filter(omKeyLocationInfo -> omKeyLocationInfo.getPartNumber() ==
partNumber)
.collect(Collectors.toList());
keyInfo.updateLocationInfoList(filteredKeyLocationInfo, true, true);
keyInfo.setDataSize(filteredKeyLocationInfo.stream()
.mapToLong(OmKeyLocationInfo::getLength)
.sum());
}
return getOzoneKeyDetails(keyInfo);
}

Expand All @@ -1801,6 +1806,29 @@ private OmKeyInfo getS3KeyInfo(
return keyInfoWithS3Context.getKeyInfo();
}

@Nonnull
private OmKeyInfo getS3PartKeyInfo(
String bucketName, String keyName, int partNumber) throws IOException {
verifyBucketName(bucketName);
Preconditions.checkNotNull(keyName);

OmKeyArgs keyArgs = new OmKeyArgs.Builder()
// Volume name is not important, as we call GetKeyInfo with
// assumeS3Context = true, OM will infer the correct s3 volume.
.setVolumeName(OzoneConfigKeys.OZONE_S3_VOLUME_NAME_DEFAULT)
.setBucketName(bucketName)
.setKeyName(keyName)
.setSortDatanodesInPipeline(topologyAwareReadEnabled)
.setLatestVersionLocation(getLatestVersionLocation)
.setForceUpdateContainerCacheFromSCM(false)
.setMultipartUploadPartNumber(partNumber)
.build();
KeyInfoWithVolumeContext keyInfoWithS3Context =
ozoneManagerClient.getKeyInfo(keyArgs, true);
keyInfoWithS3Context.getUserPrincipal().ifPresent(this::updateS3Principal);
return keyInfoWithS3Context.getKeyInfo();
}

private OmKeyInfo getKeyInfo(
String volumeName, String bucketName, String keyName,
boolean forceUpdateContainerCache) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ public OmKeyArgs.Builder toBuilder() {
if (expectedDataGeneration != null) {
builder.setExpectedDataGeneration(expectedDataGeneration);
}

return builder;
}

Expand All @@ -227,7 +228,11 @@ public KeyArgs toProtobuf() {
.setLatestVersionLocation(getLatestVersionLocation())
.setHeadOp(isHeadOp())
.setForceUpdateContainerCacheFromSCM(
isForceUpdateContainerCacheFromSCM());
isForceUpdateContainerCacheFromSCM()
);
if (multipartUploadPartNumber != 0) {
builder.setMultipartNumber(multipartUploadPartNumber);
}
if (expectedDataGeneration != null) {
builder.setExpectedDataGeneration(expectedDataGeneration);
}
Expand Down Expand Up @@ -308,8 +313,8 @@ public Builder setMultipartUploadID(String uploadID) {
return this;
}

public Builder setMultipartUploadPartNumber(int partNumber) {
this.multipartUploadPartNumber = partNumber;
public Builder setMultipartUploadPartNumber(int multipartUploadPartNumber) {
this.multipartUploadPartNumber = multipartUploadPartNumber;
return this;
}

Expand Down
33 changes: 33 additions & 0 deletions hadoop-ozone/dist/src/main/smoketest/s3/objectputget.robot
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,36 @@ Create key twice with different content and expect different ETags
Execute AWSS3Cli rm s3://${BUCKET}/test_key_to_check_etag_differences
Execute rm -rf /tmp/file1
Execute rm -rf /tmp/file2

Create&Download big file by multipart upload and get file via part numbers
Execute head -c 10000000 </dev/urandom > /tmp/big_file
${result} Execute AWSS3CliDebug cp /tmp/big_file s3://${BUCKET}/
${get_part_1_response} Execute AWSS3APICli get-object --bucket ${BUCKET} --key big_file /tmp/big_file_1 --part-number 1
${part_1_size} = Execute and checkrc echo '${get_part_1_response}' | jq -r '.ContentLength' 0
Should contain ${get_part_1_response} \"PartsCount\": 2
${get_part_2_response} Execute AWSS3APICli get-object --bucket ${BUCKET} --key big_file /tmp/big_file_2 --part-number 2
${part_2_size} = Execute and checkrc echo '${get_part_2_response}' | jq -r '.ContentLength' 0
Should contain ${get_part_2_response} \"PartsCount\": 2

Should Be Equal As Integers 10000000 ${${part_1_size} + ${part_2_size}}

${get_part_3_response} Execute AWSS3APICli get-object --bucket ${BUCKET} --key big_file /tmp/big_file_3 --part-number 3
Should contain ${get_part_3_response} \"ContentLength\": 0
Should contain ${get_part_3_response} \"PartsCount\": 2
# clean up
Execute AWSS3Cli rm s3://${BUCKET}/big_file
Execute rm -rf /tmp/big_file
Execute rm -rf /tmp/big_file_1
Execute rm -rf /tmp/big_file_2
Execute rm -rf /tmp/big_file_3

Create&Download big file by multipart upload and get file not existed part number
Execute head -c 10000000 </dev/urandom > /tmp/big_file
${result} Execute AWSS3CliDebug cp /tmp/big_file s3://${BUCKET}/
${get_part_99_response} Execute AWSS3APICli get-object --bucket ${BUCKET} --key big_file /tmp/big_file_1 --part-number 99
Should contain ${get_part_99_response} \"ContentLength\": 0
Should contain ${get_part_99_response} \"PartsCount\": 2
# clean up
Execute AWSS3Cli rm s3://${BUCKET}/big_file
Execute rm -rf /tmp/big_file
Execute rm -rf /tmp/big_file_1
Loading
Loading