/* * Copyright DataStax, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; const types = require('./types'); const token = require('./token'); const utils = require('./utils'); const MutableLong = require('./types/mutable-long'); const { Integer } = types; // Murmur3 constants //-0x783C846EEEBDAC2B const mconst1 = new MutableLong(0x53d5, 0x1142, 0x7b91, 0x87c3); //0x4cf5ad432745937f const mconst2 = new MutableLong(0x937f, 0x2745, 0xad43, 0x4cf5); const mlongFive = MutableLong.fromNumber(5); //0xff51afd7ed558ccd const mconst3 = new MutableLong(0x8ccd, 0xed55, 0xafd7, 0xff51); //0xc4ceb9fe1a85ec53 const mconst4 = new MutableLong(0xec53, 0x1a85, 0xb9fe, 0xc4ce); const mconst5 = MutableLong.fromNumber(0x52dce729); const mconst6 = MutableLong.fromNumber(0x38495ab5); /** * Represents a set of methods that are able to generate and parse tokens for the C* partitioner. * @abstract */ class Tokenizer { constructor() { } /** * Creates a token based on the Buffer value provided * @abstract * @param {Buffer|Array} value * @returns {Token} Computed token */ hash(value) { throw new Error('You must implement a hash function for the tokenizer'); } /** * Parses a token string and returns a representation of the token * @abstract * @param {String} value */ parse(value) { throw new Error('You must implement a parse function for the tokenizer'); } minToken() { throw new Error('You must implement a minToken function for the tokenizer'); } /** * Splits the range specified by start and end into numberOfSplits equal parts. * @param {Token} start Starting token * @param {Token} end End token * @param {Number} numberOfSplits Number of splits to make. */ split(start, end, numberOfSplits) { throw new Error('You must implement a split function for the tokenizer'); } /** * Common implementation for splitting token ranges when start is in * a shared Integer format. * * @param {Integer} start Starting token * @param {Integer} range How large the range of the split is * @param {Integer} ringEnd The end point of the ring so we know where to wrap * @param {Integer} ringLength The total size of the ring * @param {Number} numberOfSplits The number of splits to make * @returns {Array} The evenly-split points on the range */ splitBase(start, range, ringEnd, ringLength, numberOfSplits) { const numberOfSplitsInt = Integer.fromInt(numberOfSplits); const divider = range.divide(numberOfSplitsInt); let remainder = range.modulo(numberOfSplitsInt); const results = []; let current = start; const dividerPlusOne = divider.add(Integer.ONE); for(let i = 1; i < numberOfSplits; i++) { if (remainder.greaterThan(Integer.ZERO)) { current = current.add(dividerPlusOne); } else { current = current.add(divider); } if (ringLength && current.greaterThan(ringEnd)) { current = current.subtract(ringLength); } results.push(current); remainder = remainder.subtract(Integer.ONE); } return results; } /** * Return internal string based representation of a Token. * @param {Token} token */ stringify(token) { return token.getValue().toString(); } } /** * Uniformly distributes data across the cluster based on Cassandra flavored Murmur3 hashed values. */ class Murmur3Tokenizer extends Tokenizer { constructor() { super(); } /** * @param {Buffer} value * @return {Murmur3Token} */ hash(value) { // This is an adapted version of the MurmurHash.hash3_x64_128 from Cassandra used // for M3P. Compared to that methods, there's a few inlining of arguments and we // only return the first 64-bits of the result since that's all M3 partitioner uses. const data = value; let offset = 0; const length = data.length; const nblocks = length >> 4; // Process as 128-bit blocks. const h1 = new MutableLong(); const h2 = new MutableLong(); let k1 = new MutableLong(); let k2 = new MutableLong(); for (let i = 0; i < nblocks; i++) { k1 = this.getBlock(data, offset, i * 2); k2 = this.getBlock(data, offset, i * 2 + 1); k1.multiply(mconst1); this.rotl64(k1, 31); k1.multiply(mconst2); h1.xor(k1); this.rotl64(h1, 27); h1.add(h2); h1.multiply(mlongFive).add(mconst5); k2.multiply(mconst2); this.rotl64(k2, 33); k2.multiply(mconst1); h2.xor(k2); this.rotl64(h2, 31); h2.add(h1); h2.multiply(mlongFive).add(mconst6); } //---------- // tail // Advance offset to the unprocessed tail of the data. offset += nblocks * 16; k1 = new MutableLong(); k2 = new MutableLong(); /* eslint-disable no-fallthrough */ switch(length & 15) { case 15: k2.xor(fromSignedByte(data[offset+14]).shiftLeft(48)); case 14: k2.xor(fromSignedByte(data[offset+13]).shiftLeft(40)); case 13: k2.xor(fromSignedByte(data[offset+12]).shiftLeft(32)); case 12: k2.xor(fromSignedByte(data[offset+11]).shiftLeft(24)); case 11: k2.xor(fromSignedByte(data[offset+10]).shiftLeft(16)); case 10: k2.xor(fromSignedByte(data[offset+9]).shiftLeft(8)); case 9: k2.xor(fromSignedByte(data[offset+8])); k2.multiply(mconst2); this.rotl64(k2, 33); k2.multiply(mconst1); h2.xor(k2); case 8: k1.xor(fromSignedByte(data[offset+7]).shiftLeft(56)); case 7: k1.xor(fromSignedByte(data[offset+6]).shiftLeft(48)); case 6: k1.xor(fromSignedByte(data[offset+5]).shiftLeft(40)); case 5: k1.xor(fromSignedByte(data[offset+4]).shiftLeft(32)); case 4: k1.xor(fromSignedByte(data[offset+3]).shiftLeft(24)); case 3: k1.xor(fromSignedByte(data[offset+2]).shiftLeft(16)); case 2: k1.xor(fromSignedByte(data[offset+1]).shiftLeft(8)); case 1: k1.xor(fromSignedByte(data[offset])); k1.multiply(mconst1); this.rotl64(k1,31); k1.multiply(mconst2); h1.xor(k1); } /* eslint-enable no-fallthrough */ h1.xor(MutableLong.fromNumber(length)); h2.xor(MutableLong.fromNumber(length)); h1.add(h2); h2.add(h1); this.fmix(h1); this.fmix(h2); h1.add(h2); return new token.Murmur3Token(h1); } /** * * @param {Array} key * @param {Number} offset * @param {Number} index * @return {MutableLong} */ getBlock(key, offset, index) { const i8 = index << 3; const blockOffset = offset + i8; return new MutableLong( (key[blockOffset]) | (key[blockOffset + 1] << 8), (key[blockOffset + 2]) | (key[blockOffset + 3] << 8), (key[blockOffset + 4]) | (key[blockOffset + 5] << 8), (key[blockOffset + 6]) | (key[blockOffset + 7] << 8) ); } /** * @param {MutableLong} v * @param {Number} n */ rotl64(v, n) { const left = v.clone().shiftLeft(n); v.shiftRightUnsigned(64 - n).or(left); } /** @param {MutableLong} k */ fmix(k) { k.xor(new MutableLong(k.getUint16(2) >>> 1 | ((k.getUint16(3) << 15) & 0xffff), k.getUint16(3) >>> 1, 0, 0)); k.multiply(mconst3); const other = new MutableLong( (k.getUint16(2) >>> 1) | ((k.getUint16(3) << 15) & 0xffff), k.getUint16(3) >>> 1, 0, 0 ); k.xor(other); k.multiply(mconst4); k.xor(new MutableLong(k.getUint16(2) >>> 1 | (k.getUint16(3) << 15 & 0xffff), k.getUint16(3) >>> 1, 0, 0)); } /** * Parses a int64 decimal string representation into a MutableLong. * @param {String} value * @returns {Murmur3Token} */ parse(value) { return new token.Murmur3Token(MutableLong.fromString(value)); } minToken() { if (!this._minToken) { // minimum long value. this._minToken = this.parse('-9223372036854775808'); } return this._minToken; } maxToken() { if (!this._maxToken) { this._maxToken = this.parse('9223372036854775807'); } return this._maxToken; } maxValue() { if (!this._maxValue) { this._maxValue = Integer.fromString('9223372036854775807'); } return this._maxValue; } minValue() { if (!this._minValue) { this._minValue = Integer.fromString('-9223372036854775808'); } return this._minValue; } ringLength() { if (!this._ringLength) { this._ringLength = this.maxValue().subtract(this.minValue()); } return this._ringLength; } split(start, end, numberOfSplits) { // ]min, min] means the whole ring. if (start.equals(end) && start.equals(this.minToken())) { end = this.maxToken(); } const startVal = Integer.fromString(start.getValue().toString()); const endVal = Integer.fromString(end.getValue().toString()); let range = endVal.subtract(startVal); if (range.isNegative()) { range = range.add(this.ringLength()); } const values = this.splitBase(startVal, range, this.maxValue(), this.ringLength(), numberOfSplits); return values.map(v => this.parse(v.toString())); } stringify(token) { // Get the underlying MutableLong const value = token.getValue(); // We need a way to uniquely represent a token, it doesn't have to be the decimal string representation // Using the uint16 avoids divisions and other expensive operations on the longs return value.getUint16(0) + ',' + value.getUint16(1) + ',' + value.getUint16(2) + ',' + value.getUint16(3); } } /** * Uniformly distributes data across the cluster based on MD5 hash values. */ class RandomTokenizer extends Tokenizer { constructor() { super(); // eslint-disable-next-line this._crypto = require('crypto'); } /** * @param {Buffer|Array} value * @returns {RandomToken} */ hash(value) { if (Array.isArray(value)) { value = utils.allocBufferFromArray(value); } const hashedValue = this._crypto.createHash('md5').update(value).digest(); return new token.RandomToken(Integer.fromBuffer(hashedValue).abs()); } /** * @returns {Token} */ parse(value) { return new token.RandomToken(Integer.fromString(value)); } minToken() { if (!this._minToken) { this._minToken = this.parse('-1'); } return this._minToken; } maxValue() { if (!this._maxValue) { this._maxValue = Integer.fromNumber(Math.pow(2, 127)); } return this._maxValue; } maxToken() { if (!this._maxToken) { this._maxToken = new token.RandomToken(this.maxValue()); } return this._maxToken; } ringLength() { if (!this._ringLength) { this._ringLength = this.maxValue().add(Integer.ONE); } return this._ringLength; } split(start, end, numberOfSplits) { // ]min, min] means the whole ring. if (start.equals(end) && start.equals(this.minToken())) { end = this.maxToken(); } const startVal = start.getValue(); const endVal = end.getValue(); let range = endVal.subtract(startVal); if (range.lessThan(Integer.ZERO)) { range = range.add(this.ringLength()); } const values = this.splitBase(startVal, range, this.maxValue(), this.ringLength(), numberOfSplits); return values.map(v => new token.RandomToken(v)); } } class ByteOrderedTokenizer extends Tokenizer { constructor() { super(); } /** * @param {Buffer} value * @returns {ByteOrderedToken} */ hash(value) { // strip any trailing zeros as tokens with trailing zeros are equivalent // to those who don't have them. if (Array.isArray(value)) { value = utils.allocBufferFromArray(value); } let zeroIndex = value.length; for(let i = value.length - 1; i > 0; i--) { if(value[i] === 0) { zeroIndex = i; } else { break; } } return new token.ByteOrderedToken(value.slice(0, zeroIndex)); } stringify(token) { return token.getValue().toString('hex'); } parse(value) { return this.hash(utils.allocBufferFromString(value, 'hex')); } minToken() { if (!this._minToken) { this._minToken = this.hash([]); } return this._minToken; } _toNumber(buffer, significantBytes) { // Convert a token's byte array to a number in order to perform computations. // This depends on the number of significant bytes that is used to normalize all tokens // to the same size. For example if the token is 0x01 but significant bytes is 2, the // result is 0x0100. let target = buffer; if(buffer.length !== significantBytes) { target = Buffer.alloc(significantBytes); buffer.copy(target); } // similar to Integer.fromBuffer except we force the sign to 0. const bits = new Array(Math.ceil(target.length / 4)); for (let i = 0; i < bits.length; i++) { let offset = target.length - ((i + 1) * 4); let value; if (offset < 0) { //The buffer length is not multiple of 4 offset = offset + 4; value = 0; for (let j = 0; j < offset; j++) { const byte = target[j]; value = value | (byte << (offset - j - 1) * 8); } } else { value = target.readInt32BE(offset); } bits[i] = value; } return new Integer(bits, 0); } _toBuffer(number, significantBytes) { // Convert numeric representation back to a buffer. const buffer = Integer.toBuffer(number); if (buffer.length === significantBytes) { return buffer; } // if first byte is a sign byte, skip it. let start, length; if (buffer[0] === 0) { start = 1; length = buffer.length - 1; } else { start = 0; length = buffer.length; } const target = Buffer.alloc(significantBytes); buffer.copy(target, significantBytes - length, start, length + start); return target; } split(start, end, numberOfSplits) { const tokenOrder = start.compare(end); if (tokenOrder === 0 && start.equals(this.minToken())) { throw new Error("Cannot split whole ring with ordered partitioner"); } let startVal, endVal, range, ringLength, ringEnd; const intNumberOfSplits = Integer.fromNumber(numberOfSplits); // Since tokens are compared lexicographically, convert to numbers using the // largest length (i.e. given 0x0A and 0x0BCD, switch to 0x0A00 and 0x0BCD) let significantBytes = Math.max(start.getValue().length, end.getValue().length); if (tokenOrder < 0) { let addedBytes = 0; while (true) { startVal = this._toNumber(start.getValue(), significantBytes); endVal = this._toNumber(end.getValue(), significantBytes); range = endVal.subtract(startVal); if (addedBytes === 4 || range.compare(intNumberOfSplits) >= 0) { break; } significantBytes += 1; addedBytes += 1; } } else { let addedBytes = 0; while (true) { startVal = this._toNumber(start.getValue(), significantBytes); endVal = this._toNumber(end.getValue(), significantBytes); ringLength = Integer.fromNumber(Math.pow(2, significantBytes * 8)); ringEnd = ringLength.subtract(Integer.ONE); range = endVal.subtract(startVal).add(ringLength); if (addedBytes === 4 || range.compare(intNumberOfSplits) >= 0) { break; } significantBytes += 1; addedBytes += 1; } } const values = this.splitBase(startVal, range, ringEnd, ringLength, numberOfSplits); return values.map(v => new token.ByteOrderedToken(this._toBuffer(v, significantBytes))); } } /** * @param {Number} value * @return {MutableLong} */ function fromSignedByte(value) { if (value < 128) { return new MutableLong(value, 0, 0, 0); } return new MutableLong((value - 256) & 0xffff, 0xffff, 0xffff, 0xffff); } exports.Murmur3Tokenizer = Murmur3Tokenizer; exports.RandomTokenizer = RandomTokenizer; exports.ByteOrderedTokenizer = ByteOrderedTokenizer;