Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 78 additions & 98 deletions lib/api/apiUtils/object/getReplicationInfo.js
Original file line number Diff line number Diff line change
@@ -1,73 +1,61 @@
const { isServiceAccount, getServiceAccountProperties } =
require('../authorization/permissionChecks');
const { replicationBackends } = require('arsenal').constants;
const { isServiceAccount, getServiceAccountProperties } = require('../authorization/permissionChecks');
const { constants, models } = require('arsenal');

function _getBackend(objectMD, site) {
const backends = objectMD ? objectMD.replicationInfo.backends : [];
const backend = backends.find(o => o.site === site);
// If the backend already exists, just update the status.
if (backend) {
return Object.assign({}, backend, { status: 'PENDING' });
}
return {
site,
status: 'PENDING',
dataStoreVersionId: '',
};
}
const { replicationBackends } = constants;
const { ReplicationConfiguration } = models;

function _getStorageClasses(s3config, rule) {
if (rule.storageClass) {
return rule.storageClass.split(',');
}
const { replicationEndpoints } = s3config;
// If no storage class, use the given default endpoint or the sole endpoint
if (replicationEndpoints.length > 0) {
const endPoint =
replicationEndpoints.find(endpoint => endpoint.default) || replicationEndpoints[0];
return [endPoint.site];
}
return undefined;
/**
* Apply the default replication endpoint as a fallback storageClass
* for rules that don't specify one. Returns a new rules array; rules
* without a resolvable storageClass are dropped so they never reach
* the backend resolver.
*/
function _withDefaultStorageClass(rules, s3config) {
const { replicationEndpoints = [] } = s3config;
const fallback = replicationEndpoints.find(e => e.default)?.site ?? replicationEndpoints[0]?.site;
return rules
.map(rule => {
if (rule.storageClass) {
return rule;
}
if (!fallback) {
return null;
}
return { ...rule, storageClass: fallback };
})
.filter(Boolean);
}

function _getReplicationInfo(s3config, rule, replicationConfig, content, operationType,
objectMD, bucketMD) {
const storageTypes = [];
const backends = [];
const storageClasses = _getStorageClasses(s3config, rule);
if (!storageClasses) {
return undefined;
/**
* Check whether the authenticated user is allowed to trigger replication.
* Internal service accounts (e.g. Lifecycle) are not allowed unless their
* account properties explicitly permit it (e.g. MD ingestion).
* @param {AuthInfo} [authInfo] - authentication info of the request issuer
* @return {boolean} true if the user can trigger replication
*/
function _canUserReplicate(authInfo) {
if (!authInfo) {
return true;
}
storageClasses.forEach(storageClass => {
const storageClassName =
storageClass.endsWith(':preferred_read') ?
storageClass.split(':')[0] : storageClass;
// TODO CLDSRV-646: for consistency, should we look at replicationEndpoints instead, like
// `_getStorageClasses()` ?
const location = s3config.locationConstraints[storageClassName];
if (location && replicationBackends[location.type]) {
storageTypes.push(location.type);
}
backends.push(_getBackend(objectMD, storageClassName));
});
if (storageTypes.length > 0 && operationType) {
content.push(operationType);
const canonicalId = authInfo.getCanonicalID();
if (!isServiceAccount(canonicalId)) {
return true;
}
return {
status: 'PENDING',
backends,
content,
destination: replicationConfig.destination,
storageClass: storageClasses.join(','),
role: replicationConfig.role,
storageType: storageTypes.join(','),
isNFS: bucketMD.isNFS(),
};
const props = getServiceAccountProperties(canonicalId);
return !!props?.canReplicate;
}

/**
* Get the object replicationInfo to replicate data and metadata, or only
* metadata if the operation only changes metadata or the object is 0 bytes
* metadata if the operation only changes metadata or the object is 0 bytes.
*
* The rule-matching / dedup / per-backend stamping logic lives in
* arsenal's `ReplicationConfiguration.resolveBackends`. This function
* is the cloudserver-specific shell: it enforces the service-account
* gate, supplies a default storageClass from `replicationEndpoints`,
* decides the `content` array based on the operation kind, and
* stitches the result into a `replicationInfo` envelope.
*
* @param {object} s3config - Cloudserver configuration object
* @param {object} s3config.locationConstraints - Configured map of location constraints
* @param {object[]} s3config.replicationEndpoints - Configured replication endpoints
Expand All @@ -78,49 +66,41 @@ function _getReplicationInfo(s3config, rule, replicationConfig, content, operati
* @param {string} operationType - The type of operation to replicate
* @param {object} objectMD - The object metadata
* @param {AuthInfo} [authInfo] - authentication info of object owner
* @return {undefined}
* @return {object|undefined}
*/
function getReplicationInfo(
s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) {
const content = isMD || objSize === 0 ? ['METADATA'] : ['DATA', 'METADATA'];
function getReplicationInfo(s3config, objKey, bucketMD, isMD, objSize, operationType, objectMD, authInfo) {
const config = bucketMD.getReplicationConfiguration();
if (!config || !_canUserReplicate(authInfo)) {
return undefined;
}

// Do not replicate object in the following cases:
//
// - bucket does not have a replication configuration
//
// - replication configuration does not apply to the object
// (i.e. no rule matches object prefix)
//
// - replication configuration applies to the object (i.e. a rule matches
// object prefix) but the status is disabled
//
// - object owner is an internal service account like Lifecycle,
// unless the account properties explicitly allow it to
// replicate like MD ingestion (because we do not want to
// replicate objects created from actions triggered by internal
// services, by design)
const isCloud = site => !!replicationBackends[s3config.locationConstraints[site]?.type];
const rules = _withDefaultStorageClass(config.rules || [], s3config);
const backends = ReplicationConfiguration.resolveBackends(
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReplicationConfiguration.resolveBackends is not a function in arsenal 8.4.2. CI shows 110 test failures with TypeError: ReplicationConfiguration.resolveBackends is not a function. The arsenal dependency in package.json needs to be bumped to a version that exports resolveBackends and resolveSourceRole as static methods on ReplicationConfiguration.

— Claude Code

{ ...config, rules },
objKey,
isCloud,
objectMD?.replicationInfo?.backends,
);

if (config) {
let doReplicate = false;
if (!authInfo || !isServiceAccount(authInfo.getCanonicalID())) {
doReplicate = true;
} else {
const serviceAccountProps = getServiceAccountProperties(
authInfo.getCanonicalID());
doReplicate = serviceAccountProps.canReplicate;
}
if (doReplicate) {
const rule = config.rules.find(
rule => (objKey.startsWith(rule.prefix) && rule.enabled));
if (rule) {
// TODO CLDSRV-646 : should "merge" the replicationInfo for different rules
return _getReplicationInfo(
s3config, rule, config, content, operationType, objectMD, bucketMD);
}
}
if (backends.length === 0) {
return undefined;
}

const hasCloudBackend = backends.some(b => isCloud(b.site));

const content = (isMD || objSize === 0) ? ['METADATA'] : ['DATA', 'METADATA'];
if (hasCloudBackend && operationType) {
content.push(operationType);
}
return undefined;

return {
status: 'PENDING',
backends,
content,
role: ReplicationConfiguration.resolveSourceRole(config.role),
isNFS: bucketMD.isNFS(),
};
}

module.exports = getReplicationInfo;
28 changes: 21 additions & 7 deletions lib/metadata/acl.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,31 @@ const acl = {
objectMD.acl = addACLParams;
objectMD.originOp = 's3:ObjectAcl:Put';

// Use storageType to determine if replication update is needed, as it is set only for
// "cloud" locations. This ensures that we reset replication when CRR is used, but not
// when multi-backend replication (i.e. Zenko) is used.
// TODO: this should be refactored to properly update the replication info, accounting
// for multiple rules and resetting the status only if needed CLDSRV-646
// Rebuild replication info from the current bucket config to
// pick up any new destinations. CRR backends carry a
// resolved destination role on the entry (backbeat needs it
// to authenticate on the destination side); cloud backends
// don't (credentials live in the location configuration).
// For cloud backends, ACL replication is not supported, so
// preserve their existing status instead of resetting to
// PENDING.
const isCRR = b => !!b.role;

const replicationInfo = getReplicationInfo(config, objectKey, bucket, true);
if (replicationInfo && !replicationInfo.storageType) {
if (replicationInfo && replicationInfo.backends.some(isCRR)) {
const backends = replicationInfo.backends.map(b => {
if (isCRR(b)) {
return b;
}

const existing = objectMD.replicationInfo.backends.find(e => e.site === b.site);
return existing || b;
});

objectMD.replicationInfo = {
...objectMD.replicationInfo,
...replicationInfo,
backends,
};
}

Expand Down Expand Up @@ -171,4 +186,3 @@ const acl = {
};

module.exports = acl;

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"@azure/storage-blob": "^12.28.0",
"@hapi/joi": "^17.1.1",
"@smithy/node-http-handler": "^3.0.0",
"arsenal": "git+https://github.com/scality/Arsenal#8.4.1",
"arsenal": "git+https://github.com/scality/Arsenal#21b9bb33ad77d21609a690a5709a645eab1a95d7",
"async": "2.6.4",
"bucketclient": "scality/bucketclient#8.2.7",
"bufferutil": "^4.0.8",
Expand Down
Loading
Loading