Merge remote-tracking branch 'origin/main' into zhongzc/repartition-procedure-scaffold

This commit is contained in:
Zhenchi
2025-10-22 08:50:16 +00:00
255 changed files with 12209 additions and 2428 deletions

507
.github/scripts/package-lock.json generated vendored Normal file
View File

@@ -0,0 +1,507 @@
{
"name": "greptimedb-github-scripts",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "greptimedb-github-scripts",
"version": "1.0.0",
"dependencies": {
"@octokit/rest": "^21.0.0",
"axios": "^1.7.0"
}
},
"node_modules/@octokit/auth-token": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-5.1.2.tgz",
"integrity": "sha512-JcQDsBdg49Yky2w2ld20IHAlwr8d/d8N6NiOXbtuoPCqzbsiJgF633mVUw3x4mo0H5ypataQIX7SFu3yy44Mpw==",
"license": "MIT",
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/core": {
"version": "6.1.6",
"resolved": "https://registry.npmjs.org/@octokit/core/-/core-6.1.6.tgz",
"integrity": "sha512-kIU8SLQkYWGp3pVKiYzA5OSaNF5EE03P/R8zEmmrG6XwOg5oBjXyQVVIauQ0dgau4zYhpZEhJrvIYt6oM+zZZA==",
"license": "MIT",
"dependencies": {
"@octokit/auth-token": "^5.0.0",
"@octokit/graphql": "^8.2.2",
"@octokit/request": "^9.2.3",
"@octokit/request-error": "^6.1.8",
"@octokit/types": "^14.0.0",
"before-after-hook": "^3.0.2",
"universal-user-agent": "^7.0.0"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/endpoint": {
"version": "10.1.4",
"resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-10.1.4.tgz",
"integrity": "sha512-OlYOlZIsfEVZm5HCSR8aSg02T2lbUWOsCQoPKfTXJwDzcHQBrVBGdGXb89dv2Kw2ToZaRtudp8O3ZIYoaOjKlA==",
"license": "MIT",
"dependencies": {
"@octokit/types": "^14.0.0",
"universal-user-agent": "^7.0.2"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/graphql": {
"version": "8.2.2",
"resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-8.2.2.tgz",
"integrity": "sha512-Yi8hcoqsrXGdt0yObxbebHXFOiUA+2v3n53epuOg1QUgOB6c4XzvisBNVXJSl8RYA5KrDuSL2yq9Qmqe5N0ryA==",
"license": "MIT",
"dependencies": {
"@octokit/request": "^9.2.3",
"@octokit/types": "^14.0.0",
"universal-user-agent": "^7.0.0"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/openapi-types": {
"version": "25.1.0",
"resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-25.1.0.tgz",
"integrity": "sha512-idsIggNXUKkk0+BExUn1dQ92sfysJrje03Q0bv0e+KPLrvyqZF8MnBpFz8UNfYDwB3Ie7Z0TByjWfzxt7vseaA==",
"license": "MIT"
},
"node_modules/@octokit/plugin-paginate-rest": {
"version": "11.6.0",
"resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-11.6.0.tgz",
"integrity": "sha512-n5KPteiF7pWKgBIBJSk8qzoZWcUkza2O6A0za97pMGVrGfPdltxrfmfF5GucHYvHGZD8BdaZmmHGz5cX/3gdpw==",
"license": "MIT",
"dependencies": {
"@octokit/types": "^13.10.0"
},
"engines": {
"node": ">= 18"
},
"peerDependencies": {
"@octokit/core": ">=6"
}
},
"node_modules/@octokit/plugin-paginate-rest/node_modules/@octokit/openapi-types": {
"version": "24.2.0",
"resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-24.2.0.tgz",
"integrity": "sha512-9sIH3nSUttelJSXUrmGzl7QUBFul0/mB8HRYl3fOlgHbIWG+WnYDXU3v/2zMtAvuzZ/ed00Ei6on975FhBfzrg==",
"license": "MIT"
},
"node_modules/@octokit/plugin-paginate-rest/node_modules/@octokit/types": {
"version": "13.10.0",
"resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.10.0.tgz",
"integrity": "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA==",
"license": "MIT",
"dependencies": {
"@octokit/openapi-types": "^24.2.0"
}
},
"node_modules/@octokit/plugin-request-log": {
"version": "5.3.1",
"resolved": "https://registry.npmjs.org/@octokit/plugin-request-log/-/plugin-request-log-5.3.1.tgz",
"integrity": "sha512-n/lNeCtq+9ofhC15xzmJCNKP2BWTv8Ih2TTy+jatNCCq/gQP/V7rK3fjIfuz0pDWDALO/o/4QY4hyOF6TQQFUw==",
"license": "MIT",
"engines": {
"node": ">= 18"
},
"peerDependencies": {
"@octokit/core": ">=6"
}
},
"node_modules/@octokit/plugin-rest-endpoint-methods": {
"version": "13.5.0",
"resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-13.5.0.tgz",
"integrity": "sha512-9Pas60Iv9ejO3WlAX3maE1+38c5nqbJXV5GrncEfkndIpZrJ/WPMRd2xYDcPPEt5yzpxcjw9fWNoPhsSGzqKqw==",
"license": "MIT",
"dependencies": {
"@octokit/types": "^13.10.0"
},
"engines": {
"node": ">= 18"
},
"peerDependencies": {
"@octokit/core": ">=6"
}
},
"node_modules/@octokit/plugin-rest-endpoint-methods/node_modules/@octokit/openapi-types": {
"version": "24.2.0",
"resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-24.2.0.tgz",
"integrity": "sha512-9sIH3nSUttelJSXUrmGzl7QUBFul0/mB8HRYl3fOlgHbIWG+WnYDXU3v/2zMtAvuzZ/ed00Ei6on975FhBfzrg==",
"license": "MIT"
},
"node_modules/@octokit/plugin-rest-endpoint-methods/node_modules/@octokit/types": {
"version": "13.10.0",
"resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.10.0.tgz",
"integrity": "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA==",
"license": "MIT",
"dependencies": {
"@octokit/openapi-types": "^24.2.0"
}
},
"node_modules/@octokit/request": {
"version": "9.2.4",
"resolved": "https://registry.npmjs.org/@octokit/request/-/request-9.2.4.tgz",
"integrity": "sha512-q8ybdytBmxa6KogWlNa818r0k1wlqzNC+yNkcQDECHvQo8Vmstrg18JwqJHdJdUiHD2sjlwBgSm9kHkOKe2iyA==",
"license": "MIT",
"dependencies": {
"@octokit/endpoint": "^10.1.4",
"@octokit/request-error": "^6.1.8",
"@octokit/types": "^14.0.0",
"fast-content-type-parse": "^2.0.0",
"universal-user-agent": "^7.0.2"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/request-error": {
"version": "6.1.8",
"resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-6.1.8.tgz",
"integrity": "sha512-WEi/R0Jmq+IJKydWlKDmryPcmdYSVjL3ekaiEL1L9eo1sUnqMJ+grqmC9cjk7CA7+b2/T397tO5d8YLOH3qYpQ==",
"license": "MIT",
"dependencies": {
"@octokit/types": "^14.0.0"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/rest": {
"version": "21.1.1",
"resolved": "https://registry.npmjs.org/@octokit/rest/-/rest-21.1.1.tgz",
"integrity": "sha512-sTQV7va0IUVZcntzy1q3QqPm/r8rWtDCqpRAmb8eXXnKkjoQEtFe3Nt5GTVsHft+R6jJoHeSiVLcgcvhtue/rg==",
"license": "MIT",
"dependencies": {
"@octokit/core": "^6.1.4",
"@octokit/plugin-paginate-rest": "^11.4.2",
"@octokit/plugin-request-log": "^5.3.1",
"@octokit/plugin-rest-endpoint-methods": "^13.3.0"
},
"engines": {
"node": ">= 18"
}
},
"node_modules/@octokit/types": {
"version": "14.1.0",
"resolved": "https://registry.npmjs.org/@octokit/types/-/types-14.1.0.tgz",
"integrity": "sha512-1y6DgTy8Jomcpu33N+p5w58l6xyt55Ar2I91RPiIA0xCJBXyUAhXCcmZaDWSANiha7R9a6qJJ2CRomGPZ6f46g==",
"license": "MIT",
"dependencies": {
"@octokit/openapi-types": "^25.1.0"
}
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
"license": "MIT"
},
"node_modules/axios": {
"version": "1.12.2",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.12.2.tgz",
"integrity": "sha512-vMJzPewAlRyOgxV2dU0Cuz2O8zzzx9VYtbJOaBgXFeLc4IV/Eg50n4LowmehOOR61S8ZMpc2K5Sa7g6A4jfkUw==",
"license": "MIT",
"dependencies": {
"follow-redirects": "^1.15.6",
"form-data": "^4.0.4",
"proxy-from-env": "^1.1.0"
}
},
"node_modules/before-after-hook": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-3.0.2.tgz",
"integrity": "sha512-Nik3Sc0ncrMK4UUdXQmAnRtzmNQTAAXmXIopizwZ1W1t8QmfJj+zL4OA2I7XPTPW5z5TDqv4hRo/JzouDJnX3A==",
"license": "Apache-2.0"
},
"node_modules/call-bind-apply-helpers": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
"integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"license": "MIT",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"license": "MIT",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/dunder-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
"integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.1",
"es-errors": "^1.3.0",
"gopd": "^1.2.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-define-property": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
"integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-errors": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
"integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-object-atoms": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
"integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/es-set-tostringtag": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
"integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
"license": "MIT",
"dependencies": {
"es-errors": "^1.3.0",
"get-intrinsic": "^1.2.6",
"has-tostringtag": "^1.0.2",
"hasown": "^2.0.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/fast-content-type-parse": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/fast-content-type-parse/-/fast-content-type-parse-2.0.1.tgz",
"integrity": "sha512-nGqtvLrj5w0naR6tDPfB4cUmYCqouzyQiz6C5y/LtcDllJdrcc6WaWW6iXyIIOErTa/XRybj28aasdn4LkVk6Q==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/fastify"
},
{
"type": "opencollective",
"url": "https://opencollective.com/fastify"
}
],
"license": "MIT"
},
"node_modules/follow-redirects": {
"version": "1.15.11",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
"integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/RubenVerborgh"
}
],
"license": "MIT",
"engines": {
"node": ">=4.0"
},
"peerDependenciesMeta": {
"debug": {
"optional": true
}
}
},
"node_modules/form-data": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.4.tgz",
"integrity": "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow==",
"license": "MIT",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"es-set-tostringtag": "^2.1.0",
"hasown": "^2.0.2",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/function-bind": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
"integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-intrinsic": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
"integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
"license": "MIT",
"dependencies": {
"call-bind-apply-helpers": "^1.0.2",
"es-define-property": "^1.0.1",
"es-errors": "^1.3.0",
"es-object-atoms": "^1.1.1",
"function-bind": "^1.1.2",
"get-proto": "^1.0.1",
"gopd": "^1.2.0",
"has-symbols": "^1.1.0",
"hasown": "^2.0.2",
"math-intrinsics": "^1.1.0"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/get-proto": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
"integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
"license": "MIT",
"dependencies": {
"dunder-proto": "^1.0.1",
"es-object-atoms": "^1.0.0"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/gopd": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
"integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-symbols": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
"integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/has-tostringtag": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
"integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
"license": "MIT",
"dependencies": {
"has-symbols": "^1.0.3"
},
"engines": {
"node": ">= 0.4"
},
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/hasown": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
"integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
"license": "MIT",
"dependencies": {
"function-bind": "^1.1.2"
},
"engines": {
"node": ">= 0.4"
}
},
"node_modules/math-intrinsics": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
"integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
"license": "MIT",
"engines": {
"node": ">= 0.4"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"license": "MIT",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"license": "MIT",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
"integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
"license": "MIT"
},
"node_modules/universal-user-agent": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-7.0.3.tgz",
"integrity": "sha512-TmnEAEAsBJVZM/AADELsK76llnwcf9vMKuPz8JflO1frO8Lchitr0fNaN9d+Ap0BjKtqWqd/J17qeDnXh8CL2A==",
"license": "ISC"
}
}
}

10
.github/scripts/package.json vendored Normal file
View File

@@ -0,0 +1,10 @@
{
"name": "greptimedb-github-scripts",
"version": "1.0.0",
"type": "module",
"description": "GitHub automation scripts for GreptimeDB",
"dependencies": {
"@octokit/rest": "^21.0.0",
"axios": "^1.7.0"
}
}

160
.github/scripts/pr-review-reminder.js vendored Normal file
View File

@@ -0,0 +1,160 @@
// Daily PR Review Reminder Script
// Fetches open PRs from GreptimeDB repository and sends Slack notifications
// to PR owners and assigned reviewers to keep review process moving.
(async () => {
const { Octokit } = await import("@octokit/rest");
const { default: axios } = await import('axios');
// Configuration
const GITHUB_TOKEN = process.env.GITHUB_TOKEN;
const SLACK_WEBHOOK_URL = process.env.SLACK_PR_REVIEW_WEBHOOK_URL;
const REPO_OWNER = "GreptimeTeam";
const REPO_NAME = "greptimedb";
const GITHUB_TO_SLACK = JSON.parse(process.env.GITHUBID_SLACKID_MAPPING || '{}');
// Debug: Print environment variable status
console.log("=== Environment Variables Debug ===");
console.log(`GITHUB_TOKEN: ${GITHUB_TOKEN ? 'Set ✓' : 'NOT SET ✗'}`);
console.log(`SLACK_PR_REVIEW_WEBHOOK_URL: ${SLACK_WEBHOOK_URL ? 'Set ✓' : 'NOT SET ✗'}`);
console.log(`GITHUBID_SLACKID_MAPPING: ${process.env.GITHUBID_SLACKID_MAPPING ? `Set ✓ (${Object.keys(GITHUB_TO_SLACK).length} mappings)` : 'NOT SET ✗'}`);
console.log("===================================\n");
const octokit = new Octokit({
auth: GITHUB_TOKEN
});
// Fetch all open PRs from the repository
async function fetchOpenPRs() {
try {
const prs = await octokit.pulls.list({
owner: REPO_OWNER,
repo: REPO_NAME,
state: "open",
per_page: 100,
sort: "created",
direction: "asc"
});
return prs.data.filter((pr) => !pr.draft);
} catch (error) {
console.error("Error fetching PRs:", error);
return [];
}
}
// Convert GitHub username to Slack mention or fallback to GitHub username
function toSlackMention(githubUser) {
const slackUserId = GITHUB_TO_SLACK[githubUser];
return slackUserId ? `<@${slackUserId}>` : `@${githubUser}`;
}
// Calculate days since PR was opened
function getDaysOpen(createdAt) {
const created = new Date(createdAt);
const now = new Date();
const diffMs = now - created;
const days = Math.floor(diffMs / (1000 * 60 * 60 * 24));
return days;
}
// Get urgency emoji based on PR age
function getAgeEmoji(days) {
if (days >= 14) return "🔴"; // 14+ days - critical
if (days >= 7) return "🟠"; // 7+ days - urgent
if (days >= 3) return "🟡"; // 3+ days - needs attention
return "🟢"; // < 3 days - fresh
}
// Build Slack notification message from PR list
function buildSlackMessage(prs) {
if (prs.length === 0) {
return "*🎉 Great job! No pending PRs for review.*";
}
// Separate PRs by age threshold (14 days)
const criticalPRs = [];
const recentPRs = [];
prs.forEach(pr => {
const daysOpen = getDaysOpen(pr.created_at);
if (daysOpen >= 14) {
criticalPRs.push(pr);
} else {
recentPRs.push(pr);
}
});
const lines = [
`*🔍 Daily PR Review Reminder 🔍*`,
`Found *${criticalPRs.length}* critical PR(s) (14+ days old)\n`
];
// Show critical PRs (14+ days) in detail
if (criticalPRs.length > 0) {
criticalPRs.forEach((pr, index) => {
const owner = toSlackMention(pr.user.login);
const reviewers = pr.requested_reviewers || [];
const reviewerMentions = reviewers.map(r => toSlackMention(r.login)).join(", ");
const daysOpen = getDaysOpen(pr.created_at);
const prInfo = `${index + 1}. <${pr.html_url}|#${pr.number}: ${pr.title}>`;
const ageInfo = ` 🔴 Opened *${daysOpen}* day(s) ago`;
const ownerInfo = ` 👤 Owner: ${owner}`;
const reviewerInfo = reviewers.length > 0
? ` 👁️ Reviewers: ${reviewerMentions}`
: ` 👁️ Reviewers: _Not assigned yet_`;
lines.push(prInfo);
lines.push(ageInfo);
lines.push(ownerInfo);
lines.push(reviewerInfo);
lines.push(""); // Empty line between PRs
});
}
lines.push("_Let's keep the code review process moving! 🚀_");
return lines.join("\n");
}
// Send notification to Slack webhook
async function sendSlackNotification(message) {
if (!SLACK_WEBHOOK_URL) {
console.log("⚠️ SLACK_PR_REVIEW_WEBHOOK_URL not configured. Message preview:");
console.log("=".repeat(60));
console.log(message);
console.log("=".repeat(60));
return;
}
try {
const response = await axios.post(SLACK_WEBHOOK_URL, {
text: message
});
if (response.status !== 200) {
throw new Error(`Slack API returned status ${response.status}`);
}
console.log("Slack notification sent successfully.");
} catch (error) {
console.error("Error sending Slack notification:", error);
throw error;
}
}
// Main execution flow
async function run() {
console.log(`Fetching open PRs from ${REPO_OWNER}/${REPO_NAME}...`);
const prs = await fetchOpenPRs();
console.log(`Found ${prs.length} open PR(s).`);
const message = buildSlackMessage(prs);
console.log("Sending Slack notification...");
await sendSlackNotification(message);
}
run().catch(error => {
console.error("Script execution failed:", error);
process.exit(1);
});
})();

View File

@@ -0,0 +1,36 @@
name: PR Review Reminder
on:
schedule:
# Run at 9:00 AM UTC+8 (01:00 AM UTC) every day
- cron: '0 1 * * *'
workflow_dispatch:
jobs:
pr-review-reminder:
name: Send PR Review Reminders
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
if: ${{ github.repository == 'GreptimeTeam/greptimedb' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install dependencies
working-directory: .github/scripts
run: npm ci
- name: Run PR review reminder
working-directory: .github/scripts
run: node pr-review-reminder.js
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SLACK_PR_REVIEW_WEBHOOK_URL: ${{ vars.SLACK_PR_REVIEW_WEBHOOK_URL }}
GITHUBID_SLACKID_MAPPING: ${{ vars.GITHUBID_SLACKID_MAPPING }}

31
Cargo.lock generated
View File

@@ -1811,7 +1811,6 @@ dependencies = [
"session",
"snafu 0.8.6",
"store-api",
"substrait 0.18.0",
"table",
"tempfile",
"tokio",
@@ -1956,7 +1955,6 @@ dependencies = [
"snafu 0.8.6",
"standalone",
"store-api",
"substrait 0.18.0",
"table",
"temp-env",
"tempfile",
@@ -2033,6 +2031,7 @@ dependencies = [
"common-base",
"common-error",
"common-macro",
"common-stat",
"common-telemetry",
"common-test-util",
"common-wal",
@@ -2040,13 +2039,11 @@ dependencies = [
"datanode",
"humantime-serde",
"meta-client",
"num_cpus",
"object-store",
"serde",
"serde_json",
"serde_with",
"snafu 0.8.6",
"sysinfo",
"temp-env",
"tempfile",
"toml 0.8.23",
@@ -2481,6 +2478,7 @@ dependencies = [
"common-macro",
"common-telemetry",
"common-time",
"criterion 0.7.0",
"datafusion",
"datafusion-common",
"datatypes",
@@ -2553,9 +2551,12 @@ dependencies = [
name = "common-stat"
version = "0.18.0"
dependencies = [
"common-base",
"lazy_static",
"nix 0.30.1",
"num_cpus",
"prometheus",
"sysinfo",
]
[[package]]
@@ -3929,6 +3930,7 @@ dependencies = [
"file-engine",
"futures",
"futures-util",
"hostname 0.4.1",
"humantime-serde",
"lazy_static",
"log-store",
@@ -3947,7 +3949,6 @@ dependencies = [
"session",
"snafu 0.8.6",
"store-api",
"substrait 0.18.0",
"table",
"tokio",
"toml 0.8.23",
@@ -4788,6 +4789,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-runtime",
"common-stat",
"common-telemetry",
"common-time",
"common-version",
@@ -4804,6 +4806,7 @@ dependencies = [
"futures",
"get-size2",
"greptime-proto",
"hostname 0.4.1",
"http 1.3.1",
"humantime-serde",
"itertools 0.14.0",
@@ -4920,6 +4923,7 @@ dependencies = [
"datanode",
"datatypes",
"futures",
"hostname 0.4.1",
"humantime",
"humantime-serde",
"lazy_static",
@@ -4945,7 +4949,6 @@ dependencies = [
"sqlparser 0.55.0-greptime",
"store-api",
"strfmt",
"substrait 0.18.0",
"table",
"tokio",
"tokio-util",
@@ -5325,7 +5328,7 @@ dependencies = [
[[package]]
name = "greptime-proto"
version = "0.1.0"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=d75496d5d09dedcd0edcade57ccf0a522f4393ae#d75496d5d09dedcd0edcade57ccf0a522f4393ae"
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=69a6089933daa573c96808ec4bbc48f447ec6e8c#69a6089933daa573c96808ec4bbc48f447ec6e8c"
dependencies = [
"prost 0.13.5",
"prost-types 0.13.5",
@@ -6119,6 +6122,7 @@ dependencies = [
"serde",
"serde_json",
"snafu 0.8.6",
"store-api",
"tantivy",
"tantivy-jieba",
"tempfile",
@@ -7412,6 +7416,7 @@ dependencies = [
"etcd-client",
"futures",
"h2 0.3.26",
"hostname 0.4.1",
"http-body-util",
"humantime",
"humantime-serde",
@@ -7609,6 +7614,7 @@ dependencies = [
"common-query",
"common-recordbatch",
"common-runtime",
"common-stat",
"common-telemetry",
"common-test-util",
"common-time",
@@ -7624,6 +7630,7 @@ dependencies = [
"dotenv",
"either",
"futures",
"greptime-proto",
"humantime-serde",
"index",
"itertools 0.14.0",
@@ -9183,9 +9190,9 @@ dependencies = [
[[package]]
name = "pgwire"
version = "0.33.0"
version = "0.34.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f58d371668e6151da16be31308989058156c01257277ea8af0f97524e87cfa31"
checksum = "c748793f2a9267fa2aa409d9375a5e26e4f1504ea96e34f8cab3e2fc32042d69"
dependencies = [
"async-trait",
"base64 0.22.1",
@@ -9201,6 +9208,8 @@ dependencies = [
"ring",
"rust_decimal",
"rustls-pki-types",
"serde",
"serde_json",
"stringprep",
"thiserror 2.0.17",
"tokio",
@@ -11550,6 +11559,7 @@ dependencies = [
"client",
"common-base",
"common-catalog",
"common-decimal",
"common-error",
"common-frontend",
"common-grpc",
@@ -11996,6 +12006,7 @@ dependencies = [
"common-macro",
"common-query",
"common-sql",
"common-telemetry",
"common-time",
"datafusion",
"datafusion-common",
@@ -12354,6 +12365,7 @@ dependencies = [
"common-options",
"common-procedure",
"common-query",
"common-stat",
"common-telemetry",
"common-time",
"common-version",
@@ -12362,6 +12374,7 @@ dependencies = [
"file-engine",
"flow",
"frontend",
"hostname 0.4.1",
"log-store",
"mito2",
"query",

View File

@@ -147,7 +147,7 @@ etcd-client = { git = "https://github.com/GreptimeTeam/etcd-client", rev = "f62d
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "d75496d5d09dedcd0edcade57ccf0a522f4393ae" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "69a6089933daa573c96808ec4bbc48f447ec6e8c" }
hex = "0.4"
http = "1"
humantime = "2.1"
@@ -207,6 +207,7 @@ rstest_reuse = "0.7"
rust_decimal = "1.33"
rustc-hash = "2.0"
# It is worth noting that we should try to avoid using aws-lc-rs until it can be compiled on various platforms.
hostname = "0.4.0"
rustls = { version = "0.23.25", default-features = false }
sea-query = "0.32"
serde = { version = "1.0", features = ["derive"] }

View File

@@ -8,7 +8,7 @@ CARGO_BUILD_OPTS := --locked
IMAGE_REGISTRY ?= docker.io
IMAGE_NAMESPACE ?= greptime
IMAGE_TAG ?= latest
DEV_BUILDER_IMAGE_TAG ?= 2025-05-19-f55023f3-20250829091211
DEV_BUILDER_IMAGE_TAG ?= 2025-10-01-8fe17d43-20251011080129
BUILDX_MULTI_PLATFORM_BUILD ?= false
BUILDX_BUILDER_NAME ?= gtbuilder
BASE_IMAGE ?= ubuntu

View File

@@ -153,7 +153,7 @@
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
| `region_engine.mito.enable_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format. |
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |
@@ -474,7 +474,7 @@
| `meta_client.metadata_cache_ttl` | String | `10m` | TTL of the metadata cache. |
| `meta_client.metadata_cache_tti` | String | `5m` | -- |
| `wal` | -- | -- | The WAL options. |
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka. |
| `wal.provider` | String | `raft_engine` | The provider of the WAL.<br/>- `raft_engine`: the wal is stored in the local file system by raft-engine.<br/>- `kafka`: it's remote wal that data is stored in Kafka.<br/>- `noop`: it's a no-op WAL provider that does not store any WAL data.<br/>**Notes: any unflushed data will be lost when the datanode is shutdown.** |
| `wal.dir` | String | Unset | The directory to store the WAL files.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.file_size` | String | `128MB` | The size of the WAL segment file.<br/>**It's only used when the provider is `raft_engine`**. |
| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a purge.<br/>**It's only used when the provider is `raft_engine`**. |
@@ -547,7 +547,7 @@
| `region_engine.mito.max_concurrent_scan_files` | Integer | `384` | Maximum number of SST files to scan concurrently. |
| `region_engine.mito.allow_stale_entries` | Bool | `false` | Whether to allow stale WAL entries read during replay. |
| `region_engine.mito.min_compaction_interval` | String | `0m` | Minimum time interval between two compactions.<br/>To align with the old behavior, the default value is 0 (no restrictions). |
| `region_engine.mito.enable_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format. |
| `region_engine.mito.default_experimental_flat_format` | Bool | `false` | Whether to enable experimental flat format as the default format. |
| `region_engine.mito.index` | -- | -- | The options for index in Mito engine. |
| `region_engine.mito.index.aux_path` | String | `""` | Auxiliary directory path for the index in filesystem, used to store intermediate files for<br/>creating the index and staging files for searching the index, defaults to `{data_home}/index_intermediate`.<br/>The default name for this directory is `index_intermediate` for backward compatibility.<br/><br/>This path contains two subdirectories:<br/>- `__intm`: for storing intermediate files used during creating index.<br/>- `staging`: for storing staging files used during searching index. |
| `region_engine.mito.index.staging_size` | String | `2GB` | The max capacity of the staging directory. |

View File

@@ -118,6 +118,7 @@ metadata_cache_tti = "5m"
## The provider of the WAL.
## - `raft_engine`: the wal is stored in the local file system by raft-engine.
## - `kafka`: it's remote wal that data is stored in Kafka.
## - `noop`: it's a no-op WAL provider that does not store any WAL data.<br/>**Notes: any unflushed data will be lost when the datanode is shutdown.**
provider = "raft_engine"
## The directory to store the WAL files.
@@ -500,8 +501,8 @@ allow_stale_entries = false
## To align with the old behavior, the default value is 0 (no restrictions).
min_compaction_interval = "0m"
## Whether to enable experimental flat format.
enable_experimental_flat_format = false
## Whether to enable experimental flat format as the default format.
default_experimental_flat_format = false
## The options for index in Mito engine.
[region_engine.mito.index]

View File

@@ -584,8 +584,8 @@ allow_stale_entries = false
## To align with the old behavior, the default value is 0 (no restrictions).
min_compaction_interval = "0m"
## Whether to enable experimental flat format.
enable_experimental_flat_format = false
## Whether to enable experimental flat format as the default format.
default_experimental_flat_format = false
## The options for index in Mito engine.
[region_engine.mito.index]

View File

@@ -21,30 +21,22 @@ use common_time::time::Time;
use common_time::timestamp::TimeUnit;
use common_time::{Date, IntervalDayTime, IntervalMonthDayNano, IntervalYearMonth, Timestamp};
use datatypes::prelude::{ConcreteDataType, ValueRef};
use datatypes::scalars::ScalarVector;
use datatypes::types::{
Int8Type, Int16Type, IntervalType, StructField, StructType, TimeType, TimestampType, UInt8Type,
UInt16Type,
IntervalType, JsonFormat, StructField, StructType, TimeType, TimestampType,
};
use datatypes::value::{
ListValue, ListValueRef, OrderedF32, OrderedF64, StructValue, StructValueRef, Value,
};
use datatypes::vectors::{
BinaryVector, BooleanVector, DateVector, Decimal128Vector, Float32Vector, Float64Vector,
Int32Vector, Int64Vector, IntervalDayTimeVector, IntervalMonthDayNanoVector,
IntervalYearMonthVector, PrimitiveVector, StringVector, TimeMicrosecondVector,
TimeMillisecondVector, TimeNanosecondVector, TimeSecondVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, UInt32Vector,
UInt64Vector, VectorRef,
};
use datatypes::vectors::VectorRef;
use greptime_proto::v1::column_data_type_extension::TypeExt;
use greptime_proto::v1::ddl_request::Expr;
use greptime_proto::v1::greptime_request::Request;
use greptime_proto::v1::query_request::Query;
use greptime_proto::v1::value::ValueData;
use greptime_proto::v1::{
self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonTypeExtension,
ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension, VectorTypeExtension,
self, ColumnDataTypeExtension, DdlRequest, DecimalTypeExtension, JsonNativeTypeExtension,
JsonTypeExtension, ListTypeExtension, QueryRequest, Row, SemanticType, StructTypeExtension,
VectorTypeExtension,
};
use paste::paste;
use snafu::prelude::*;
@@ -116,7 +108,30 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
ConcreteDataType::binary_datatype()
}
}
ColumnDataType::Json => ConcreteDataType::json_datatype(),
ColumnDataType::Json => {
let type_ext = datatype_wrapper
.datatype_ext
.as_ref()
.and_then(|datatype_ext| datatype_ext.type_ext.as_ref());
match type_ext {
Some(TypeExt::JsonType(_)) => {
// legacy json type
ConcreteDataType::json_datatype()
}
Some(TypeExt::JsonNativeType(type_ext)) => {
// native json type
let inner_type = ColumnDataTypeWrapper {
datatype: type_ext.datatype(),
datatype_ext: type_ext.datatype_extension.clone().map(|d| *d),
};
ConcreteDataType::json_native_datatype(inner_type.into())
}
_ => {
// invalid state, type extension is missing or invalid
ConcreteDataType::null_datatype()
}
}
}
ColumnDataType::String => ConcreteDataType::string_datatype(),
ColumnDataType::Date => ConcreteDataType::date_datatype(),
ColumnDataType::Datetime => ConcreteDataType::timestamp_microsecond_datatype(),
@@ -171,7 +186,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
datatype: d.datatype(),
datatype_ext: d.datatype_extension.clone().map(|d| *d),
};
ConcreteDataType::list_datatype(item_type.into())
ConcreteDataType::list_datatype(Arc::new(item_type.into()))
} else {
// invalid state: type extension not found
ConcreteDataType::null_datatype()
@@ -194,7 +209,7 @@ impl From<ColumnDataTypeWrapper> for ConcreteDataType {
StructField::new(f.name.clone(), field_type.into(), true)
})
.collect::<Vec<_>>();
ConcreteDataType::struct_datatype(StructType::from(fields))
ConcreteDataType::struct_datatype(StructType::new(Arc::new(fields)))
} else {
// invalid state: type extension not found
ConcreteDataType::null_datatype()
@@ -383,9 +398,28 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
})),
})
}
ColumnDataType::Json => datatype.as_json().map(|_| ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
}),
ColumnDataType::Json => {
if let Some(json_type) = datatype.as_json() {
match &json_type.format {
JsonFormat::Jsonb => Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonType(JsonTypeExtension::JsonBinary.into())),
}),
JsonFormat::Native(inner) => {
let inner_type = ColumnDataTypeWrapper::try_from(*inner.clone())?;
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonNativeType(Box::new(
JsonNativeTypeExtension {
datatype: inner_type.datatype.into(),
datatype_extension: inner_type.datatype_ext.map(Box::new),
},
))),
})
}
}
} else {
None
}
}
ColumnDataType::Vector => {
datatype
.as_vector()
@@ -412,7 +446,7 @@ impl TryFrom<ConcreteDataType> for ColumnDataTypeWrapper {
ColumnDataType::Struct => {
if let Some(struct_type) = datatype.as_struct() {
let mut fields = Vec::with_capacity(struct_type.fields().len());
for field in struct_type.fields() {
for field in struct_type.fields().iter() {
let field_type =
ColumnDataTypeWrapper::try_from(field.data_type().clone())?;
let proto_field = crate::v1::StructField {
@@ -549,7 +583,10 @@ pub fn values_with_capacity(datatype: ColumnDataType, capacity: usize) -> Values
..Default::default()
},
ColumnDataType::Json => Values {
// TODO(sunng87): remove this when we finally sunset legacy jsonb
string_values: Vec::with_capacity(capacity),
// for native json
json_values: Vec::with_capacity(capacity),
..Default::default()
},
ColumnDataType::Vector => Values {
@@ -719,7 +756,7 @@ pub fn pb_value_to_value_ref<'a>(
let list_value = ListValueRef::RefList {
val: items,
item_datatype: item_type.clone(),
item_datatype: Arc::new(item_type.clone()),
};
ValueRef::List(list_value)
}
@@ -758,260 +795,27 @@ pub fn pb_value_to_value_ref<'a>(
let struct_value_ref = StructValueRef::RefList {
val: items,
fields: StructType::new(struct_fields),
fields: StructType::new(Arc::new(struct_fields)),
};
ValueRef::Struct(struct_value_ref)
}
}
}
pub fn pb_values_to_vector_ref(data_type: &ConcreteDataType, values: Values) -> VectorRef {
match data_type {
ConcreteDataType::Boolean(_) => Arc::new(BooleanVector::from(values.bool_values)),
ConcreteDataType::Int8(_) => Arc::new(PrimitiveVector::<Int8Type>::from_iter_values(
values.i8_values.into_iter().map(|x| x as i8),
)),
ConcreteDataType::Int16(_) => Arc::new(PrimitiveVector::<Int16Type>::from_iter_values(
values.i16_values.into_iter().map(|x| x as i16),
)),
ConcreteDataType::Int32(_) => Arc::new(Int32Vector::from_vec(values.i32_values)),
ConcreteDataType::Int64(_) => Arc::new(Int64Vector::from_vec(values.i64_values)),
ConcreteDataType::UInt8(_) => Arc::new(PrimitiveVector::<UInt8Type>::from_iter_values(
values.u8_values.into_iter().map(|x| x as u8),
)),
ConcreteDataType::UInt16(_) => Arc::new(PrimitiveVector::<UInt16Type>::from_iter_values(
values.u16_values.into_iter().map(|x| x as u16),
)),
ConcreteDataType::UInt32(_) => Arc::new(UInt32Vector::from_vec(values.u32_values)),
ConcreteDataType::UInt64(_) => Arc::new(UInt64Vector::from_vec(values.u64_values)),
ConcreteDataType::Float32(_) => Arc::new(Float32Vector::from_vec(values.f32_values)),
ConcreteDataType::Float64(_) => Arc::new(Float64Vector::from_vec(values.f64_values)),
ConcreteDataType::Binary(_) => Arc::new(BinaryVector::from(values.binary_values)),
ConcreteDataType::String(_) => Arc::new(StringVector::from_vec(values.string_values)),
ConcreteDataType::Date(_) => Arc::new(DateVector::from_vec(values.date_values)),
ConcreteDataType::Timestamp(unit) => match unit {
TimestampType::Second(_) => Arc::new(TimestampSecondVector::from_vec(
values.timestamp_second_values,
)),
TimestampType::Millisecond(_) => Arc::new(TimestampMillisecondVector::from_vec(
values.timestamp_millisecond_values,
)),
TimestampType::Microsecond(_) => Arc::new(TimestampMicrosecondVector::from_vec(
values.timestamp_microsecond_values,
)),
TimestampType::Nanosecond(_) => Arc::new(TimestampNanosecondVector::from_vec(
values.timestamp_nanosecond_values,
)),
},
ConcreteDataType::Time(unit) => match unit {
TimeType::Second(_) => Arc::new(TimeSecondVector::from_iter_values(
values.time_second_values.iter().map(|x| *x as i32),
)),
TimeType::Millisecond(_) => Arc::new(TimeMillisecondVector::from_iter_values(
values.time_millisecond_values.iter().map(|x| *x as i32),
)),
TimeType::Microsecond(_) => Arc::new(TimeMicrosecondVector::from_vec(
values.time_microsecond_values,
)),
TimeType::Nanosecond(_) => Arc::new(TimeNanosecondVector::from_vec(
values.time_nanosecond_values,
)),
},
ValueData::JsonValue(inner_value) => {
let json_datatype_ext = datatype_ext
.as_ref()
.and_then(|ext| {
if let Some(TypeExt::JsonNativeType(l)) = &ext.type_ext {
Some(l)
} else {
None
}
})
.expect("json value must contain datatype ext");
ConcreteDataType::Interval(unit) => match unit {
IntervalType::YearMonth(_) => Arc::new(IntervalYearMonthVector::from_vec(
values.interval_year_month_values,
)),
IntervalType::DayTime(_) => Arc::new(IntervalDayTimeVector::from_iter_values(
values
.interval_day_time_values
.iter()
.map(|x| IntervalDayTime::from_i64(*x).into()),
)),
IntervalType::MonthDayNano(_) => {
Arc::new(IntervalMonthDayNanoVector::from_iter_values(
values
.interval_month_day_nano_values
.iter()
.map(|x| IntervalMonthDayNano::new(x.months, x.days, x.nanoseconds).into()),
))
}
},
ConcreteDataType::Decimal128(d) => Arc::new(Decimal128Vector::from_values(
values.decimal128_values.iter().map(|x| {
Decimal128::from_value_precision_scale(x.hi, x.lo, d.precision(), d.scale()).into()
}),
)),
ConcreteDataType::Vector(_) => Arc::new(BinaryVector::from_vec(values.binary_values)),
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Struct(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
unreachable!()
}
}
}
pub fn pb_values_to_values(data_type: &ConcreteDataType, values: Values) -> Vec<Value> {
match data_type {
ConcreteDataType::Int64(_) => values
.i64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Float64(_) => values
.f64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::String(_) => values
.string_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Boolean(_) => values
.bool_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Int8(_) => values
.i8_values
.into_iter()
// Safety: Since i32 only stores i8 data here, so i32 as i8 is safe.
.map(|val| (val as i8).into())
.collect(),
ConcreteDataType::Int16(_) => values
.i16_values
.into_iter()
// Safety: Since i32 only stores i16 data here, so i32 as i16 is safe.
.map(|val| (val as i16).into())
.collect(),
ConcreteDataType::Int32(_) => values
.i32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::UInt8(_) => values
.u8_values
.into_iter()
// Safety: Since i32 only stores u8 data here, so i32 as u8 is safe.
.map(|val| (val as u8).into())
.collect(),
ConcreteDataType::UInt16(_) => values
.u16_values
.into_iter()
// Safety: Since i32 only stores u16 data here, so i32 as u16 is safe.
.map(|val| (val as u16).into())
.collect(),
ConcreteDataType::UInt32(_) => values
.u32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::UInt64(_) => values
.u64_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Float32(_) => values
.f32_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Binary(_) => values
.binary_values
.into_iter()
.map(|val| val.into())
.collect(),
ConcreteDataType::Date(_) => values
.date_values
.into_iter()
.map(|v| Value::Date(v.into()))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Second(_)) => values
.timestamp_second_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_second(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Millisecond(_)) => values
.timestamp_millisecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_millisecond(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Microsecond(_)) => values
.timestamp_microsecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_microsecond(v)))
.collect(),
ConcreteDataType::Timestamp(TimestampType::Nanosecond(_)) => values
.timestamp_nanosecond_values
.into_iter()
.map(|v| Value::Timestamp(Timestamp::new_nanosecond(v)))
.collect(),
ConcreteDataType::Time(TimeType::Second(_)) => values
.time_second_values
.into_iter()
.map(|v| Value::Time(Time::new_second(v)))
.collect(),
ConcreteDataType::Time(TimeType::Millisecond(_)) => values
.time_millisecond_values
.into_iter()
.map(|v| Value::Time(Time::new_millisecond(v)))
.collect(),
ConcreteDataType::Time(TimeType::Microsecond(_)) => values
.time_microsecond_values
.into_iter()
.map(|v| Value::Time(Time::new_microsecond(v)))
.collect(),
ConcreteDataType::Time(TimeType::Nanosecond(_)) => values
.time_nanosecond_values
.into_iter()
.map(|v| Value::Time(Time::new_nanosecond(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::YearMonth(_)) => values
.interval_year_month_values
.into_iter()
.map(|v| Value::IntervalYearMonth(IntervalYearMonth::from_i32(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::DayTime(_)) => values
.interval_day_time_values
.into_iter()
.map(|v| Value::IntervalDayTime(IntervalDayTime::from_i64(v)))
.collect(),
ConcreteDataType::Interval(IntervalType::MonthDayNano(_)) => values
.interval_month_day_nano_values
.into_iter()
.map(|v| {
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(
v.months,
v.days,
v.nanoseconds,
))
})
.collect(),
ConcreteDataType::Decimal128(d) => values
.decimal128_values
.into_iter()
.map(|v| {
Value::Decimal128(Decimal128::from_value_precision_scale(
v.hi,
v.lo,
d.precision(),
d.scale(),
))
})
.collect(),
ConcreteDataType::Vector(_) => values.binary_values.into_iter().map(|v| v.into()).collect(),
ConcreteDataType::Null(_)
| ConcreteDataType::List(_)
| ConcreteDataType::Struct(_)
| ConcreteDataType::Dictionary(_)
| ConcreteDataType::Duration(_)
| ConcreteDataType::Json(_) => {
unreachable!()
ValueRef::Json(Box::new(pb_value_to_value_ref(
inner_value,
json_datatype_ext.datatype_extension.as_deref(),
)))
}
}
}
@@ -1133,6 +937,9 @@ pub fn to_proto_value(value: Value) -> v1::Value {
items: convert_struct_to_pb_values(struct_value),
})),
},
Value::Json(v) => v1::Value {
value_data: Some(ValueData::JsonValue(Box::new(to_proto_value(*v)))),
},
Value::Duration(_) => v1::Value { value_data: None },
}
}
@@ -1187,6 +994,7 @@ pub fn proto_value_type(value: &v1::Value) -> Option<ColumnDataType> {
ValueData::Decimal128Value(_) => ColumnDataType::Decimal128,
ValueData::ListValue(_) => ColumnDataType::List,
ValueData::StructValue(_) => ColumnDataType::Struct,
ValueData::JsonValue(_) => ColumnDataType::Json,
};
Some(value_type)
}
@@ -1257,6 +1065,9 @@ pub fn value_to_grpc_value(value: Value) -> GrpcValue {
.collect();
Some(ValueData::StructValue(v1::StructValue { items }))
}
Value::Json(inner_value) => Some(ValueData::JsonValue(Box::new(value_to_grpc_value(
*inner_value,
)))),
Value::Duration(_) => unreachable!(),
},
}
@@ -1350,13 +1161,11 @@ mod tests {
use std::sync::Arc;
use common_time::interval::IntervalUnit;
use datatypes::types::{
Int32Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalYearMonthType,
TimeMillisecondType, TimeSecondType, TimestampMillisecondType, TimestampSecondType,
UInt32Type,
use datatypes::scalars::ScalarVector;
use datatypes::types::{Int8Type, Int32Type, UInt8Type, UInt32Type};
use datatypes::vectors::{
BooleanVector, DateVector, Float32Vector, PrimitiveVector, StringVector,
};
use datatypes::vectors::BooleanVector;
use paste::paste;
use super::*;
use crate::v1::Column;
@@ -1446,6 +1255,10 @@ mod tests {
let values = values_with_capacity(ColumnDataType::Struct, 2);
let values = values.struct_values;
assert_eq!(2, values.capacity());
let values = values_with_capacity(ColumnDataType::Json, 2);
assert_eq!(2, values.json_values.capacity());
assert_eq!(2, values.string_values.capacity());
}
#[test]
@@ -1539,10 +1352,10 @@ mod tests {
ColumnDataTypeWrapper::vector_datatype(3).into()
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())),
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype()).into()
);
let struct_type = StructType::new(vec![
let struct_type = StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
@@ -1555,7 +1368,7 @@ mod tests {
ConcreteDataType::string_datatype(),
true,
),
]);
]));
assert_eq!(
ConcreteDataType::struct_datatype(struct_type.clone()),
ColumnDataTypeWrapper::struct_datatype(vec![
@@ -1569,6 +1382,54 @@ mod tests {
])
.into()
);
assert_eq!(
ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype(
struct_type.clone()
)),
ColumnDataTypeWrapper::new(
ColumnDataType::Json,
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonNativeType(Box::new(JsonNativeTypeExtension {
datatype: ColumnDataType::Struct.into(),
datatype_extension: Some(Box::new(ColumnDataTypeExtension {
type_ext: Some(TypeExt::StructType(StructTypeExtension {
fields: vec![
v1::StructField {
name: "id".to_string(),
datatype: ColumnDataTypeWrapper::int64_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "name".to_string(),
datatype: ColumnDataTypeWrapper::string_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "age".to_string(),
datatype: ColumnDataTypeWrapper::int32_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "address".to_string(),
datatype: ColumnDataTypeWrapper::string_datatype()
.datatype()
.into(),
datatype_extension: None
}
]
}))
}))
})))
})
)
.into()
)
}
#[test]
@@ -1674,7 +1535,7 @@ mod tests {
assert_eq!(
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::int16_datatype()),
ConcreteDataType::list_datatype(ConcreteDataType::int16_datatype())
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int16_datatype()))
.try_into()
.expect("Failed to create column datatype from List(ListType { item_type: Int16(Int16Type) })")
);
@@ -1687,14 +1548,78 @@ mod tests {
ColumnDataTypeWrapper::list_datatype(ColumnDataTypeWrapper::string_datatype())
)
]),
ConcreteDataType::struct_datatype(StructType::new(vec![
ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![
StructField::new("a".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"a.a".to_string(),
ConcreteDataType::list_datatype(ConcreteDataType::string_datatype()), true
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::string_datatype())), true
)
])).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })")
)
]))).try_into().expect("Failed to create column datatype from Struct(StructType { fields: [StructField { name: \"a\", data_type: Int64(Int64Type) }, StructField { name: \"a.a\", data_type: List(ListType { item_type: String(StringType) }) }] })")
);
let struct_type = StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
ConcreteDataType::string_datatype(),
true,
),
StructField::new("age".to_string(), ConcreteDataType::int32_datatype(), true),
StructField::new(
"address".to_string(),
ConcreteDataType::string_datatype(),
true,
),
]));
assert_eq!(
ColumnDataTypeWrapper::new(
ColumnDataType::Json,
Some(ColumnDataTypeExtension {
type_ext: Some(TypeExt::JsonNativeType(Box::new(JsonNativeTypeExtension {
datatype: ColumnDataType::Struct.into(),
datatype_extension: Some(Box::new(ColumnDataTypeExtension {
type_ext: Some(TypeExt::StructType(StructTypeExtension {
fields: vec![
v1::StructField {
name: "id".to_string(),
datatype: ColumnDataTypeWrapper::int64_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "name".to_string(),
datatype: ColumnDataTypeWrapper::string_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "age".to_string(),
datatype: ColumnDataTypeWrapper::int32_datatype()
.datatype()
.into(),
datatype_extension: None
},
v1::StructField {
name: "address".to_string(),
datatype: ColumnDataTypeWrapper::string_datatype()
.datatype()
.into(),
datatype_extension: None
}
]
}))
}))
})))
})
),
ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype(
struct_type.clone()
))
.try_into()
.expect("failed to convert json type")
);
}
#[test]
@@ -1706,269 +1631,6 @@ mod tests {
assert_eq!(interval.nanoseconds, 3);
}
#[test]
fn test_convert_timestamp_values() {
// second
let actual = pb_values_to_values(
&ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType)),
Values {
timestamp_second_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Timestamp(Timestamp::new_second(1_i64)),
Value::Timestamp(Timestamp::new_second(2_i64)),
Value::Timestamp(Timestamp::new_second(3_i64)),
];
assert_eq!(expect, actual);
// millisecond
let actual = pb_values_to_values(
&ConcreteDataType::Timestamp(TimestampType::Millisecond(TimestampMillisecondType)),
Values {
timestamp_millisecond_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Timestamp(Timestamp::new_millisecond(1_i64)),
Value::Timestamp(Timestamp::new_millisecond(2_i64)),
Value::Timestamp(Timestamp::new_millisecond(3_i64)),
];
assert_eq!(expect, actual);
}
#[test]
fn test_convert_time_values() {
// second
let actual = pb_values_to_values(
&ConcreteDataType::Time(TimeType::Second(TimeSecondType)),
Values {
time_second_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Time(Time::new_second(1_i64)),
Value::Time(Time::new_second(2_i64)),
Value::Time(Time::new_second(3_i64)),
];
assert_eq!(expect, actual);
// millisecond
let actual = pb_values_to_values(
&ConcreteDataType::Time(TimeType::Millisecond(TimeMillisecondType)),
Values {
time_millisecond_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::Time(Time::new_millisecond(1_i64)),
Value::Time(Time::new_millisecond(2_i64)),
Value::Time(Time::new_millisecond(3_i64)),
];
assert_eq!(expect, actual);
}
#[test]
fn test_convert_interval_values() {
// year_month
let actual = pb_values_to_values(
&ConcreteDataType::Interval(IntervalType::YearMonth(IntervalYearMonthType)),
Values {
interval_year_month_values: vec![1_i32, 2_i32, 3_i32],
..Default::default()
},
);
let expect = vec![
Value::IntervalYearMonth(IntervalYearMonth::new(1_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(2_i32)),
Value::IntervalYearMonth(IntervalYearMonth::new(3_i32)),
];
assert_eq!(expect, actual);
// day_time
let actual = pb_values_to_values(
&ConcreteDataType::Interval(IntervalType::DayTime(IntervalDayTimeType)),
Values {
interval_day_time_values: vec![1_i64, 2_i64, 3_i64],
..Default::default()
},
);
let expect = vec![
Value::IntervalDayTime(IntervalDayTime::from_i64(1_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(2_i64)),
Value::IntervalDayTime(IntervalDayTime::from_i64(3_i64)),
];
assert_eq!(expect, actual);
// month_day_nano
let actual = pb_values_to_values(
&ConcreteDataType::Interval(IntervalType::MonthDayNano(IntervalMonthDayNanoType)),
Values {
interval_month_day_nano_values: vec![
v1::IntervalMonthDayNano {
months: 1,
days: 2,
nanoseconds: 3,
},
v1::IntervalMonthDayNano {
months: 5,
days: 6,
nanoseconds: 7,
},
v1::IntervalMonthDayNano {
months: 9,
days: 10,
nanoseconds: 11,
},
],
..Default::default()
},
);
let expect = vec![
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(1, 2, 3)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(5, 6, 7)),
Value::IntervalMonthDayNano(IntervalMonthDayNano::new(9, 10, 11)),
];
assert_eq!(expect, actual);
}
macro_rules! test_convert_values {
($grpc_data_type: ident, $values: expr, $concrete_data_type: ident, $expected_ret: expr) => {
paste! {
#[test]
fn [<test_convert_ $grpc_data_type _values>]() {
let values = Values {
[<$grpc_data_type _values>]: $values,
..Default::default()
};
let data_type = ConcreteDataType::[<$concrete_data_type _datatype>]();
let result = pb_values_to_values(&data_type, values);
assert_eq!(
$expected_ret,
result
);
}
}
};
}
test_convert_values!(
i8,
vec![1_i32, 2, 3],
int8,
vec![Value::Int8(1), Value::Int8(2), Value::Int8(3)]
);
test_convert_values!(
u8,
vec![1_u32, 2, 3],
uint8,
vec![Value::UInt8(1), Value::UInt8(2), Value::UInt8(3)]
);
test_convert_values!(
i16,
vec![1_i32, 2, 3],
int16,
vec![Value::Int16(1), Value::Int16(2), Value::Int16(3)]
);
test_convert_values!(
u16,
vec![1_u32, 2, 3],
uint16,
vec![Value::UInt16(1), Value::UInt16(2), Value::UInt16(3)]
);
test_convert_values!(
i32,
vec![1, 2, 3],
int32,
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)]
);
test_convert_values!(
u32,
vec![1, 2, 3],
uint32,
vec![Value::UInt32(1), Value::UInt32(2), Value::UInt32(3)]
);
test_convert_values!(
i64,
vec![1, 2, 3],
int64,
vec![Value::Int64(1), Value::Int64(2), Value::Int64(3)]
);
test_convert_values!(
u64,
vec![1, 2, 3],
uint64,
vec![Value::UInt64(1), Value::UInt64(2), Value::UInt64(3)]
);
test_convert_values!(
f32,
vec![1.0, 2.0, 3.0],
float32,
vec![
Value::Float32(1.0.into()),
Value::Float32(2.0.into()),
Value::Float32(3.0.into())
]
);
test_convert_values!(
f64,
vec![1.0, 2.0, 3.0],
float64,
vec![
Value::Float64(1.0.into()),
Value::Float64(2.0.into()),
Value::Float64(3.0.into())
]
);
test_convert_values!(
string,
vec!["1".to_string(), "2".to_string(), "3".to_string()],
string,
vec![
Value::String("1".into()),
Value::String("2".into()),
Value::String("3".into())
]
);
test_convert_values!(
binary,
vec!["1".into(), "2".into(), "3".into()],
binary,
vec![
Value::Binary(b"1".to_vec().into()),
Value::Binary(b"2".to_vec().into()),
Value::Binary(b"3".to_vec().into())
]
);
test_convert_values!(
date,
vec![1, 2, 3],
date,
vec![
Value::Date(1.into()),
Value::Date(2.into()),
Value::Date(3.into())
]
);
#[test]
fn test_vectors_to_rows_for_different_types() {
let boolean_vec = BooleanVector::from_vec(vec![true, false, true]);
@@ -2075,7 +1737,7 @@ mod tests {
fn test_list_to_pb_value() {
let value = Value::List(ListValue::new(
vec![Value::Boolean(true)],
ConcreteDataType::boolean_datatype(),
Arc::new(ConcreteDataType::boolean_datatype()),
));
let pb_value = to_proto_value(value);
@@ -2095,14 +1757,14 @@ mod tests {
let value = Value::Struct(
StructValue::try_new(
items,
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new(
"a.a".to_string(),
ConcreteDataType::boolean_datatype(),
true,
),
StructField::new("a.b".to_string(), ConcreteDataType::string_datatype(), true),
]),
])),
)
.unwrap(),
);

View File

@@ -48,7 +48,7 @@ use datatypes::schema::SchemaRef;
use lazy_static::lazy_static;
use paste::paste;
use process_list::InformationSchemaProcessList;
use store_api::sst_entry::{ManifestSstEntry, StorageSstEntry};
use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
use store_api::storage::{ScanRequest, TableId};
use table::TableRef;
use table::metadata::TableType;
@@ -68,7 +68,7 @@ use crate::system_schema::information_schema::region_peers::InformationSchemaReg
use crate::system_schema::information_schema::runtime_metrics::InformationSchemaMetrics;
use crate::system_schema::information_schema::schemata::InformationSchemaSchemata;
use crate::system_schema::information_schema::ssts::{
InformationSchemaSstsManifest, InformationSchemaSstsStorage,
InformationSchemaSstsIndexMeta, InformationSchemaSstsManifest, InformationSchemaSstsStorage,
};
use crate::system_schema::information_schema::table_constraints::InformationSchemaTableConstraints;
use crate::system_schema::information_schema::tables::InformationSchemaTables;
@@ -263,6 +263,9 @@ impl SystemSchemaProviderInner for InformationSchemaProvider {
SSTS_STORAGE => Some(Arc::new(InformationSchemaSstsStorage::new(
self.catalog_manager.clone(),
)) as _),
SSTS_INDEX_META => Some(Arc::new(InformationSchemaSstsIndexMeta::new(
self.catalog_manager.clone(),
)) as _),
_ => None,
}
}
@@ -342,6 +345,10 @@ impl InformationSchemaProvider {
SSTS_STORAGE.to_string(),
self.build_table(SSTS_STORAGE).unwrap(),
);
tables.insert(
SSTS_INDEX_META.to_string(),
self.build_table(SSTS_INDEX_META).unwrap(),
);
}
tables.insert(TABLES.to_string(), self.build_table(TABLES).unwrap());
@@ -456,6 +463,8 @@ pub enum DatanodeInspectKind {
SstManifest,
/// List SST entries discovered in storage layer
SstStorage,
/// List index metadata collected from manifest
SstIndexMeta,
}
impl DatanodeInspectRequest {
@@ -464,6 +473,7 @@ impl DatanodeInspectRequest {
match self.kind {
DatanodeInspectKind::SstManifest => ManifestSstEntry::build_plan(self.scan),
DatanodeInspectKind::SstStorage => StorageSstEntry::build_plan(self.scan),
DatanodeInspectKind::SstIndexMeta => PuffinIndexMetaEntry::build_plan(self.scan),
}
}
}

View File

@@ -50,8 +50,9 @@ const PEER_TYPE_METASRV: &str = "METASRV";
const PEER_ID: &str = "peer_id";
const PEER_TYPE: &str = "peer_type";
const PEER_ADDR: &str = "peer_addr";
const CPUS: &str = "cpus";
const MEMORY_BYTES: &str = "memory_bytes";
const PEER_HOSTNAME: &str = "peer_hostname";
const TOTAL_CPU_MILLICORES: &str = "total_cpu_millicores";
const TOTAL_MEMORY_BYTES: &str = "total_memory_bytes";
const VERSION: &str = "version";
const GIT_COMMIT: &str = "git_commit";
const START_TIME: &str = "start_time";
@@ -66,14 +67,15 @@ const INIT_CAPACITY: usize = 42;
/// - `peer_id`: the peer server id.
/// - `peer_type`: the peer type, such as `datanode`, `frontend`, `metasrv` etc.
/// - `peer_addr`: the peer gRPC address.
/// - `cpus`: the number of CPUs of the peer.
/// - `memory_bytes`: the memory bytes of the peer.
/// - `total_cpu_millicores`: the total CPU millicores of the peer.
/// - `total_memory_bytes`: the total memory bytes of the peer.
/// - `version`: the build package version of the peer.
/// - `git_commit`: the build git commit hash of the peer.
/// - `start_time`: the starting time of the peer.
/// - `uptime`: the uptime of the peer.
/// - `active_time`: the time since the last activity of the peer.
/// - `node_status`: the status info of the peer.
/// - `peer_hostname`: the hostname of the peer.
///
#[derive(Debug)]
pub(super) struct InformationSchemaClusterInfo {
@@ -94,8 +96,17 @@ impl InformationSchemaClusterInfo {
ColumnSchema::new(PEER_ID, ConcreteDataType::int64_datatype(), false),
ColumnSchema::new(PEER_TYPE, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(CPUS, ConcreteDataType::uint32_datatype(), false),
ColumnSchema::new(MEMORY_BYTES, ConcreteDataType::uint64_datatype(), false),
ColumnSchema::new(PEER_HOSTNAME, ConcreteDataType::string_datatype(), true),
ColumnSchema::new(
TOTAL_CPU_MILLICORES,
ConcreteDataType::uint32_datatype(),
false,
),
ColumnSchema::new(
TOTAL_MEMORY_BYTES,
ConcreteDataType::uint64_datatype(),
false,
),
ColumnSchema::new(VERSION, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(GIT_COMMIT, ConcreteDataType::string_datatype(), false),
ColumnSchema::new(
@@ -155,6 +166,7 @@ struct InformationSchemaClusterInfoBuilder {
peer_ids: Int64VectorBuilder,
peer_types: StringVectorBuilder,
peer_addrs: StringVectorBuilder,
peer_hostnames: StringVectorBuilder,
cpus: UInt32VectorBuilder,
memory_bytes: UInt64VectorBuilder,
versions: StringVectorBuilder,
@@ -173,6 +185,7 @@ impl InformationSchemaClusterInfoBuilder {
peer_ids: Int64VectorBuilder::with_capacity(INIT_CAPACITY),
peer_types: StringVectorBuilder::with_capacity(INIT_CAPACITY),
peer_addrs: StringVectorBuilder::with_capacity(INIT_CAPACITY),
peer_hostnames: StringVectorBuilder::with_capacity(INIT_CAPACITY),
cpus: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
memory_bytes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY),
versions: StringVectorBuilder::with_capacity(INIT_CAPACITY),
@@ -203,6 +216,7 @@ impl InformationSchemaClusterInfoBuilder {
(PEER_ID, &Value::from(peer_id)),
(PEER_TYPE, &Value::from(peer_type)),
(PEER_ADDR, &Value::from(node_info.peer.addr.as_str())),
(PEER_HOSTNAME, &Value::from(node_info.hostname.as_str())),
(VERSION, &Value::from(node_info.version.as_str())),
(GIT_COMMIT, &Value::from(node_info.git_commit.as_str())),
];
@@ -214,6 +228,7 @@ impl InformationSchemaClusterInfoBuilder {
self.peer_ids.push(Some(peer_id));
self.peer_types.push(Some(peer_type));
self.peer_addrs.push(Some(&node_info.peer.addr));
self.peer_hostnames.push(Some(&node_info.hostname));
self.versions.push(Some(&node_info.version));
self.git_commits.push(Some(&node_info.git_commit));
if node_info.start_time_ms > 0 {
@@ -253,6 +268,7 @@ impl InformationSchemaClusterInfoBuilder {
Arc::new(self.peer_ids.finish()),
Arc::new(self.peer_types.finish()),
Arc::new(self.peer_addrs.finish()),
Arc::new(self.peer_hostnames.finish()),
Arc::new(self.cpus.finish()),
Arc::new(self.memory_bytes.finish()),
Arc::new(self.versions.finish()),

View File

@@ -15,20 +15,22 @@
use std::sync::{Arc, Weak};
use common_catalog::consts::{
INFORMATION_SCHEMA_SSTS_MANIFEST_TABLE_ID, INFORMATION_SCHEMA_SSTS_STORAGE_TABLE_ID,
INFORMATION_SCHEMA_SSTS_INDEX_META_TABLE_ID, INFORMATION_SCHEMA_SSTS_MANIFEST_TABLE_ID,
INFORMATION_SCHEMA_SSTS_STORAGE_TABLE_ID,
};
use common_error::ext::BoxedError;
use common_recordbatch::SendableRecordBatchStream;
use common_recordbatch::adapter::AsyncRecordBatchStreamAdapter;
use datatypes::schema::SchemaRef;
use snafu::ResultExt;
use store_api::sst_entry::{ManifestSstEntry, StorageSstEntry};
use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
use store_api::storage::{ScanRequest, TableId};
use crate::CatalogManager;
use crate::error::{ProjectSchemaSnafu, Result};
use crate::information_schema::{
DatanodeInspectKind, DatanodeInspectRequest, InformationTable, SSTS_MANIFEST, SSTS_STORAGE,
DatanodeInspectKind, DatanodeInspectRequest, InformationTable, SSTS_INDEX_META, SSTS_MANIFEST,
SSTS_STORAGE,
};
use crate::system_schema::utils;
@@ -140,3 +142,58 @@ impl InformationTable for InformationSchemaSstsStorage {
)))
}
}
/// Information schema table for index metadata.
pub struct InformationSchemaSstsIndexMeta {
schema: SchemaRef,
catalog_manager: Weak<dyn CatalogManager>,
}
impl InformationSchemaSstsIndexMeta {
pub(super) fn new(catalog_manager: Weak<dyn CatalogManager>) -> Self {
Self {
schema: PuffinIndexMetaEntry::schema(),
catalog_manager,
}
}
}
impl InformationTable for InformationSchemaSstsIndexMeta {
fn table_id(&self) -> TableId {
INFORMATION_SCHEMA_SSTS_INDEX_META_TABLE_ID
}
fn table_name(&self) -> &'static str {
SSTS_INDEX_META
}
fn schema(&self) -> SchemaRef {
self.schema.clone()
}
fn to_stream(&self, request: ScanRequest) -> Result<SendableRecordBatchStream> {
let schema = if let Some(p) = &request.projection {
Arc::new(self.schema.try_project(p).context(ProjectSchemaSnafu)?)
} else {
self.schema.clone()
};
let info_ext = utils::information_extension(&self.catalog_manager)?;
let req = DatanodeInspectRequest {
kind: DatanodeInspectKind::SstIndexMeta,
scan: request,
};
let future = async move {
info_ext
.inspect_datanode(req)
.await
.map_err(BoxedError::new)
.context(common_recordbatch::error::ExternalSnafu)
};
Ok(Box::pin(AsyncRecordBatchStreamAdapter::new(
schema,
Box::pin(future),
)))
}
}

View File

@@ -50,3 +50,4 @@ pub const REGION_STATISTICS: &str = "region_statistics";
pub const PROCESS_LIST: &str = "process_list";
pub const SSTS_MANIFEST: &str = "ssts_manifest";
pub const SSTS_STORAGE: &str = "ssts_storage";
pub const SSTS_INDEX_META: &str = "ssts_index_meta";

View File

@@ -371,7 +371,8 @@ impl InformationSchemaTablesBuilder {
self.auto_increment.push(Some(0));
self.row_format.push(Some("Fixed"));
self.table_collation.push(Some("utf8_bin"));
self.update_time.push(None);
self.update_time
.push(Some(table_info.meta.updated_on.timestamp().into()));
self.check_time.push(None);
// use mariadb default table version number here
self.version.push(Some(11));

View File

@@ -61,7 +61,6 @@ servers.workspace = true
session.workspace = true
snafu.workspace = true
store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
tracing-appender.workspace = true

View File

@@ -157,6 +157,7 @@ fn create_table_info(table_id: TableId, table_name: TableName) -> RawTableInfo {
schema: RawSchema::new(column_schemas),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
updated_on: chrono::DateTime::default(),
primary_key_indices: vec![],
next_column_id: columns as u32 + 1,
value_indices: vec![],

View File

@@ -82,7 +82,6 @@ similar-asserts.workspace = true
snafu.workspace = true
common-stat.workspace = true
store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
toml.workspace = true

View File

@@ -316,6 +316,13 @@ pub enum Error {
location: Location,
source: standalone::error::Error,
},
#[snafu(display("Invalid WAL provider"))]
InvalidWalProvider {
#[snafu(implicit)]
location: Location,
source: common_wal::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -373,6 +380,7 @@ impl ErrorExt for Error {
}
Error::MetaClientInit { source, .. } => source.status_code(),
Error::SchemaNotFound { .. } => StatusCode::DatabaseNotFound,
Error::InvalidWalProvider { .. } => StatusCode::InvalidArguments,
}
}

View File

@@ -18,7 +18,7 @@ use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_mem_prof::activate_heap_profile;
use common_stat::{get_cpu_limit, get_memory_limit};
use common_stat::{get_total_cpu_millicores, get_total_memory_bytes};
use common_telemetry::{error, info, warn};
use crate::error::Result;
@@ -125,7 +125,8 @@ pub fn log_versions(version: &str, short_version: &str, app: &str) {
}
pub fn create_resource_limit_metrics(app: &str) {
if let Some(cpu_limit) = get_cpu_limit() {
let cpu_limit = get_total_cpu_millicores();
if cpu_limit > 0 {
info!(
"GreptimeDB start with cpu limit in millicores: {}",
cpu_limit
@@ -133,7 +134,8 @@ pub fn create_resource_limit_metrics(app: &str) {
CPU_LIMIT.with_label_values(&[app]).set(cpu_limit);
}
if let Some(memory_limit) = get_memory_limit() {
let memory_limit = get_total_memory_bytes();
if memory_limit > 0 {
info!(
"GreptimeDB start with memory limit in bytes: {}",
memory_limit

View File

@@ -19,6 +19,7 @@ use std::{fs, path};
use async_trait::async_trait;
use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry};
use catalog::information_schema::InformationExtensionRef;
use catalog::kvbackend::KvBackendCatalogManagerBuilder;
use catalog::process_manager::ProcessManager;
use clap::Parser;
@@ -404,6 +405,8 @@ impl StartCommand {
procedure_manager.clone(),
));
plugins.insert::<InformationExtensionRef>(information_extension.clone());
let process_manager = Arc::new(ProcessManager::new(opts.grpc.server_addr.clone(), None));
let builder = KvBackendCatalogManagerBuilder::new(
information_extension.clone(),
@@ -473,7 +476,11 @@ impl StartCommand {
.step(10)
.build(),
);
let kafka_options = opts.wal.clone().into();
let kafka_options = opts
.wal
.clone()
.try_into()
.context(error::InvalidWalProviderSnafu)?;
let wal_options_allocator = build_wal_options_allocator(&kafka_options, kv_backend.clone())
.await
.context(error::BuildWalOptionsAllocatorSnafu)?;

View File

@@ -108,6 +108,8 @@ pub const INFORMATION_SCHEMA_PROCESS_LIST_TABLE_ID: u32 = 36;
pub const INFORMATION_SCHEMA_SSTS_MANIFEST_TABLE_ID: u32 = 37;
/// id for information_schema.ssts_storage
pub const INFORMATION_SCHEMA_SSTS_STORAGE_TABLE_ID: u32 = 38;
/// id for information_schema.ssts_index_meta
pub const INFORMATION_SCHEMA_SSTS_INDEX_META_TABLE_ID: u32 = 39;
// ----- End of information_schema tables -----

View File

@@ -11,15 +11,14 @@ workspace = true
common-base.workspace = true
common-error.workspace = true
common-macro.workspace = true
common-stat.workspace = true
config.workspace = true
humantime-serde.workspace = true
num_cpus.workspace = true
object-store.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_with.workspace = true
snafu.workspace = true
sysinfo.workspace = true
toml.workspace = true
[dev-dependencies]

View File

@@ -13,61 +13,22 @@
// limitations under the License.
use common_base::readable_size::ReadableSize;
use sysinfo::System;
/// Get the CPU core number of system, aware of cgroups.
pub fn get_cpus() -> usize {
// This function will check cgroups
num_cpus::get()
}
/// Get the total memory of the system.
/// If `cgroup_limits` is enabled, it will also check it.
pub fn get_sys_total_memory() -> Option<ReadableSize> {
if sysinfo::IS_SUPPORTED_SYSTEM {
let mut sys_info = System::new();
sys_info.refresh_memory();
let mut total_memory = sys_info.total_memory();
// Compare with cgroups memory limit, use smaller values
// This method is only implemented for Linux. It always returns None for all other systems.
if let Some(cgroup_limits) = sys_info.cgroup_limits() {
total_memory = total_memory.min(cgroup_limits.total_memory)
}
Some(ReadableSize(total_memory))
} else {
None
}
}
use common_stat::{get_total_cpu_millicores, get_total_memory_readable};
/// `ResourceSpec` holds the static resource specifications of a node,
/// such as CPU cores and memory capacity. These values are fixed
/// at startup and do not change dynamically during runtime.
#[derive(Debug, Clone, Copy)]
pub struct ResourceSpec {
pub cpus: usize,
pub cpus: i64,
pub memory: Option<ReadableSize>,
}
impl Default for ResourceSpec {
fn default() -> Self {
Self {
cpus: get_cpus(),
memory: get_sys_total_memory(),
cpus: get_total_cpu_millicores(),
memory: get_total_memory_readable(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_cpus() {
assert!(get_cpus() > 0);
}
#[test]
fn test_get_sys_total_memory() {
assert!(get_sys_total_memory().unwrap() > ReadableSize::mb(0));
}
}

View File

@@ -37,6 +37,8 @@ const COMPACT_TYPE_STRICT_WINDOW: &str = "strict_window";
/// Compact type: strict window (short name).
const COMPACT_TYPE_STRICT_WINDOW_SHORT: &str = "swcs";
const DEFAULT_COMPACTION_PARALLELISM: u32 = 1;
#[admin_fn(
name = FlushTableFunction,
display_name = flush_table,
@@ -95,7 +97,7 @@ pub(crate) async fn compact_table(
query_ctx: &QueryContextRef,
params: &[ValueRef<'_>],
) -> Result<Value> {
let request = parse_compact_params(params, query_ctx)?;
let request = parse_compact_request(params, query_ctx)?;
info!("Compact table request: {:?}", request);
let affected_rows = table_mutation_handler
@@ -117,37 +119,46 @@ fn compact_signature() -> Signature {
/// - `[<table_name>]`: only tables name provided, using default compaction type: regular
/// - `[<table_name>, <type>]`: specify table name and compaction type. The compaction options will be default.
/// - `[<table_name>, <type>, <options>]`: provides both type and type-specific options.
fn parse_compact_params(
/// - For `twcs`, it accepts `parallelism=[N]` where N is an unsigned 32 bits number
/// - For `swcs`, it accepts two numeric parameter: `parallelism` and `window`.
fn parse_compact_request(
params: &[ValueRef<'_>],
query_ctx: &QueryContextRef,
) -> Result<CompactTableRequest> {
ensure!(
!params.is_empty(),
!params.is_empty() && params.len() <= 3,
InvalidFuncArgsSnafu {
err_msg: "Args cannot be empty",
err_msg: format!(
"The length of the args is not correct, expect 1-4, have: {}",
params.len()
),
}
);
let (table_name, compact_type) = match params {
let (table_name, compact_type, parallelism) = match params {
// 1. Only table name, strategy defaults to twcs and default parallelism.
[ValueRef::String(table_name)] => (
table_name,
compact_request::Options::Regular(Default::default()),
DEFAULT_COMPACTION_PARALLELISM,
),
// 2. Both table name and strategy are provided.
[
ValueRef::String(table_name),
ValueRef::String(compact_ty_str),
] => {
let compact_type = parse_compact_type(compact_ty_str, None)?;
(table_name, compact_type)
let (compact_type, parallelism) = parse_compact_options(compact_ty_str, None)?;
(table_name, compact_type, parallelism)
}
// 3. Table name, strategy and strategy specific options
[
ValueRef::String(table_name),
ValueRef::String(compact_ty_str),
ValueRef::String(options_str),
] => {
let compact_type = parse_compact_type(compact_ty_str, Some(options_str))?;
(table_name, compact_type)
let (compact_type, parallelism) =
parse_compact_options(compact_ty_str, Some(options_str))?;
(table_name, compact_type, parallelism)
}
_ => {
return UnsupportedInputDataTypeSnafu {
@@ -167,35 +178,126 @@ fn parse_compact_params(
schema_name,
table_name,
compact_options: compact_type,
parallelism,
})
}
/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chose,
/// Parses compaction strategy type. For `strict_window` or `swcs` strict window compaction is chosen,
/// otherwise choose regular (TWCS) compaction.
fn parse_compact_type(type_str: &str, option: Option<&str>) -> Result<compact_request::Options> {
fn parse_compact_options(
type_str: &str,
option: Option<&str>,
) -> Result<(compact_request::Options, u32)> {
if type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW)
| type_str.eq_ignore_ascii_case(COMPACT_TYPE_STRICT_WINDOW_SHORT)
{
let window_seconds = option
.map(|v| {
i64::from_str(v).map_err(|_| {
InvalidFuncArgsSnafu {
err_msg: format!(
"Compact window is expected to be a valid number, provided: {}",
v
),
}
.build()
})
})
.transpose()?
.unwrap_or(0);
let Some(option_str) = option else {
return Ok((
compact_request::Options::StrictWindow(StrictWindow { window_seconds: 0 }),
DEFAULT_COMPACTION_PARALLELISM,
));
};
Ok(compact_request::Options::StrictWindow(StrictWindow {
window_seconds,
}))
// For compatibility, accepts single number as window size.
if let Ok(window_seconds) = i64::from_str(option_str) {
return Ok((
compact_request::Options::StrictWindow(StrictWindow { window_seconds }),
DEFAULT_COMPACTION_PARALLELISM,
));
};
// Parse keyword arguments in forms: `key1=value1,key2=value2`
let mut window_seconds = 0i64;
let mut parallelism = DEFAULT_COMPACTION_PARALLELISM;
let pairs: Vec<&str> = option_str.split(',').collect();
for pair in pairs {
let kv: Vec<&str> = pair.trim().split('=').collect();
if kv.len() != 2 {
return InvalidFuncArgsSnafu {
err_msg: format!("Invalid key-value pair: {}", pair.trim()),
}
.fail();
}
let key = kv[0].trim();
let value = kv[1].trim();
match key {
"window" | "window_seconds" => {
window_seconds = i64::from_str(value).map_err(|_| {
InvalidFuncArgsSnafu {
err_msg: format!("Invalid value for window: {}", value),
}
.build()
})?;
}
"parallelism" => {
parallelism = value.parse::<u32>().map_err(|_| {
InvalidFuncArgsSnafu {
err_msg: format!("Invalid value for parallelism: {}", value),
}
.build()
})?;
}
_ => {
return InvalidFuncArgsSnafu {
err_msg: format!("Unknown parameter: {}", key),
}
.fail();
}
}
}
Ok((
compact_request::Options::StrictWindow(StrictWindow { window_seconds }),
parallelism,
))
} else {
Ok(compact_request::Options::Regular(Default::default()))
// TWCS strategy
let Some(option_str) = option else {
return Ok((
compact_request::Options::Regular(Default::default()),
DEFAULT_COMPACTION_PARALLELISM,
));
};
let mut parallelism = DEFAULT_COMPACTION_PARALLELISM;
let pairs: Vec<&str> = option_str.split(',').collect();
for pair in pairs {
let kv: Vec<&str> = pair.trim().split('=').collect();
if kv.len() != 2 {
return InvalidFuncArgsSnafu {
err_msg: format!("Invalid key-value pair: {}", pair.trim()),
}
.fail();
}
let key = kv[0].trim();
let value = kv[1].trim();
match key {
"parallelism" => {
parallelism = value.parse::<u32>().map_err(|_| {
InvalidFuncArgsSnafu {
err_msg: format!("Invalid value for parallelism: {}", value),
}
.build()
})?;
}
_ => {
return InvalidFuncArgsSnafu {
err_msg: format!("Unknown parameter: {}", key),
}
.fail();
}
}
}
Ok((
compact_request::Options::Regular(Default::default()),
parallelism,
))
}
}
@@ -301,7 +403,7 @@ mod tests {
assert_eq!(
expected,
&parse_compact_params(&params, &QueryContext::arc()).unwrap()
&parse_compact_request(&params, &QueryContext::arc()).unwrap()
);
}
}
@@ -316,6 +418,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 1,
},
),
(
@@ -325,6 +428,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 1,
},
),
(
@@ -337,6 +441,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 1,
},
),
(
@@ -346,6 +451,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 1,
},
),
(
@@ -355,6 +461,7 @@ mod tests {
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::StrictWindow(StrictWindow { window_seconds: 0 }),
parallelism: 1,
},
),
(
@@ -366,15 +473,7 @@ mod tests {
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 3600,
}),
},
),
(
&["table", "regular", "abcd"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 1,
},
),
(
@@ -386,12 +485,82 @@ mod tests {
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 120,
}),
parallelism: 1,
},
),
// Test with parallelism parameter
(
&["table", "regular", "parallelism=4"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 4,
},
),
(
&["table", "strict_window", "window=3600,parallelism=2"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 3600,
}),
parallelism: 2,
},
),
(
&["table", "strict_window", "window=3600"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 3600,
}),
parallelism: 1,
},
),
(
&["table", "strict_window", "window_seconds=7200"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 7200,
}),
parallelism: 1,
},
),
(
&["table", "strict_window", "window=1800"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::StrictWindow(StrictWindow {
window_seconds: 1800,
}),
parallelism: 1,
},
),
(
&["table", "regular", "parallelism=8"],
CompactTableRequest {
catalog_name: DEFAULT_CATALOG_NAME.to_string(),
schema_name: DEFAULT_SCHEMA_NAME.to_string(),
table_name: "table".to_string(),
compact_options: Options::Regular(Default::default()),
parallelism: 8,
},
),
]);
assert!(
parse_compact_params(
parse_compact_request(
&["table", "strict_window", "abc"]
.into_iter()
.map(ValueRef::String)
@@ -402,7 +571,7 @@ mod tests {
);
assert!(
parse_compact_params(
parse_compact_request(
&["a.b.table", "strict_window", "abc"]
.into_iter()
.map(ValueRef::String)
@@ -411,5 +580,88 @@ mod tests {
)
.is_err()
);
// Test invalid parallelism
assert!(
parse_compact_request(
&["table", "regular", "options", "invalid"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
// Test too many parameters
assert!(
parse_compact_request(
&["table", "regular", "options", "4", "extra"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
// Test invalid keyword argument format
assert!(
parse_compact_request(
&["table", "strict_window", "window"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
// Test invalid keyword
assert!(
parse_compact_request(
&["table", "strict_window", "invalid_key=123"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
assert!(
parse_compact_request(
&["table", "regular", "abcd"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
// Test invalid window value
assert!(
parse_compact_request(
&["table", "strict_window", "window=abc"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
// Test invalid parallelism in options string
assert!(
parse_compact_request(
&["table", "strict_window", "parallelism=abc"]
.into_iter()
.map(ValueRef::String)
.collect::<Vec<_>>(),
&QueryContext::arc(),
)
.is_err()
);
}
}

View File

@@ -15,7 +15,7 @@
use std::borrow::Cow;
use std::sync::Arc;
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray};
use arrow_schema::{DataType, Field};
use datafusion::logical_expr::{Signature, TypeSignature, Volatility};
use datafusion_common::{Result, ScalarValue};
@@ -63,7 +63,7 @@ impl VectorProduct {
}
let t = args.schema.field(0).data_type();
if !matches!(t, DataType::Utf8 | DataType::Binary) {
if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) {
return Err(datafusion_common::DataFusionError::Internal(format!(
"unexpected input datatype {t} when creating `VEC_PRODUCT`"
)));
@@ -91,6 +91,13 @@ impl VectorProduct {
.map(|x| x.map(Cow::Owned))
.collect::<Result<Vec<_>>>()?
}
DataType::LargeUtf8 => {
let arr: &LargeStringArray = values[0].as_string();
arr.iter()
.filter_map(|x| x.map(|s| parse_veclit_from_strlit(s).map_err(Into::into)))
.map(|x: Result<Vec<f32>>| x.map(Cow::Owned))
.collect::<Result<Vec<_>>>()?
}
DataType::Binary => {
let arr: &BinaryArray = values[0].as_binary();
arr.iter()

View File

@@ -14,7 +14,7 @@
use std::sync::Arc;
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, StringArray};
use arrow::array::{Array, ArrayRef, AsArray, BinaryArray, LargeStringArray, StringArray};
use arrow_schema::{DataType, Field};
use datafusion_common::{Result, ScalarValue};
use datafusion_expr::{
@@ -63,7 +63,7 @@ impl VectorSum {
}
let t = args.schema.field(0).data_type();
if !matches!(t, DataType::Utf8 | DataType::Binary) {
if !matches!(t, DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary) {
return Err(datafusion_common::DataFusionError::Internal(format!(
"unexpected input datatype {t} when creating `VEC_SUM`"
)));
@@ -98,6 +98,21 @@ impl VectorSum {
*self.inner(vec_column.len()) += vec_column;
}
}
DataType::LargeUtf8 => {
let arr: &LargeStringArray = values[0].as_string();
for s in arr.iter() {
let Some(s) = s else {
if is_update {
self.has_null = true;
self.sum = None;
}
return Ok(());
};
let values = parse_veclit_from_strlit(s)?;
let vec_column = DVectorView::from_slice(&values, values.len());
*self.inner(vec_column.len()) += vec_column;
}
}
DataType::Binary => {
let arr: &BinaryArray = values[0].as_binary();
for b in arr.iter() {

View File

@@ -1,123 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use api::helper::ColumnDataTypeWrapper;
use api::v1::{Column, DeleteRequest as GrpcDeleteRequest};
use datatypes::prelude::ConcreteDataType;
use snafu::{ResultExt, ensure};
use table::requests::DeleteRequest;
use crate::error::{ColumnDataTypeSnafu, IllegalDeleteRequestSnafu, Result};
use crate::insert::add_values_to_builder;
pub fn to_table_delete_request(
catalog_name: &str,
schema_name: &str,
request: GrpcDeleteRequest,
) -> Result<DeleteRequest> {
let row_count = request.row_count as usize;
let mut key_column_values = HashMap::with_capacity(request.key_columns.len());
for Column {
column_name,
values,
null_mask,
datatype,
datatype_extension,
..
} in request.key_columns
{
let Some(values) = values else { continue };
let datatype: ConcreteDataType =
ColumnDataTypeWrapper::try_new(datatype, datatype_extension)
.context(ColumnDataTypeSnafu)?
.into();
let vector = add_values_to_builder(datatype, values, row_count, null_mask)?;
ensure!(
key_column_values
.insert(column_name.clone(), vector)
.is_none(),
IllegalDeleteRequestSnafu {
reason: format!("Duplicated column '{column_name}' in delete request.")
}
);
}
Ok(DeleteRequest {
catalog_name: catalog_name.to_string(),
schema_name: schema_name.to_string(),
table_name: request.table_name,
key_column_values,
})
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use api::v1::ColumnDataType;
use api::v1::column::Values;
use datatypes::prelude::{ScalarVector, VectorRef};
use datatypes::vectors::{Int32Vector, StringVector};
use super::*;
#[test]
fn test_to_table_delete_request() {
let grpc_request = GrpcDeleteRequest {
table_name: "foo".to_string(),
key_columns: vec![
Column {
column_name: "id".to_string(),
values: Some(Values {
i32_values: vec![1, 2, 3],
..Default::default()
}),
datatype: ColumnDataType::Int32 as i32,
..Default::default()
},
Column {
column_name: "name".to_string(),
values: Some(Values {
string_values: vec!["a".to_string(), "b".to_string(), "c".to_string()],
..Default::default()
}),
datatype: ColumnDataType::String as i32,
..Default::default()
},
],
row_count: 3,
};
let mut request =
to_table_delete_request("foo_catalog", "foo_schema", grpc_request).unwrap();
assert_eq!(request.catalog_name, "foo_catalog");
assert_eq!(request.schema_name, "foo_schema");
assert_eq!(request.table_name, "foo");
assert_eq!(
Arc::new(Int32Vector::from_slice(vec![1, 2, 3])) as VectorRef,
request.key_column_values.remove("id").unwrap()
);
assert_eq!(
Arc::new(StringVector::from_slice(&["a", "b", "c"])) as VectorRef,
request.key_column_values.remove("name").unwrap()
);
assert!(request.key_column_values.is_empty());
}
}

View File

@@ -25,13 +25,6 @@ use store_api::metadata::MetadataError;
#[snafu(visibility(pub))]
#[stack_trace_debug]
pub enum Error {
#[snafu(display("Illegal delete request, reason: {reason}"))]
IllegalDeleteRequest {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Column datatype error"))]
ColumnDataType {
#[snafu(implicit)]
@@ -65,13 +58,6 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to create vector"))]
CreateVector {
#[snafu(implicit)]
location: Location,
source: datatypes::error::Error,
},
#[snafu(display("Missing required field in protobuf, field: {}", field))]
MissingField {
field: String,
@@ -87,13 +73,6 @@ pub enum Error {
source: api::error::Error,
},
#[snafu(display("Unexpected values length, reason: {}", reason))]
UnexpectedValuesLength {
reason: String,
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unknown location type: {}", location_type))]
UnknownLocationType {
location_type: i32,
@@ -189,18 +168,13 @@ pub type Result<T> = std::result::Result<T, Error>;
impl ErrorExt for Error {
fn status_code(&self) -> StatusCode {
match self {
Error::IllegalDeleteRequest { .. } => StatusCode::InvalidArguments,
Error::ColumnDataType { .. } => StatusCode::Internal,
Error::DuplicatedTimestampColumn { .. }
| Error::DuplicatedColumnName { .. }
| Error::MissingTimestampColumn { .. } => StatusCode::InvalidArguments,
Error::CreateVector { .. } => StatusCode::InvalidArguments,
Error::MissingField { .. } => StatusCode::InvalidArguments,
Error::InvalidColumnDef { source, .. } => source.status_code(),
Error::UnexpectedValuesLength { .. } | Error::UnknownLocationType { .. } => {
StatusCode::InvalidArguments
}
Error::UnknownLocationType { .. } => StatusCode::InvalidArguments,
Error::UnknownColumnDataType { .. } | Error::InvalidStringIndexColumnType { .. } => {
StatusCode::InvalidArguments

View File

@@ -1,80 +0,0 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use api::helper;
use api::v1::column::Values;
use common_base::BitVec;
use datatypes::data_type::{ConcreteDataType, DataType};
use datatypes::prelude::VectorRef;
use snafu::{ResultExt, ensure};
use crate::error::{CreateVectorSnafu, Result, UnexpectedValuesLengthSnafu};
pub(crate) fn add_values_to_builder(
data_type: ConcreteDataType,
values: Values,
row_count: usize,
null_mask: Vec<u8>,
) -> Result<VectorRef> {
if null_mask.is_empty() {
Ok(helper::pb_values_to_vector_ref(&data_type, values))
} else {
let builder = &mut data_type.create_mutable_vector(row_count);
let values = helper::pb_values_to_values(&data_type, values);
let null_mask = BitVec::from_vec(null_mask);
ensure!(
null_mask.count_ones() + values.len() == row_count,
UnexpectedValuesLengthSnafu {
reason: "If null_mask is not empty, the sum of the number of nulls and the length of values must be equal to row_count."
}
);
let mut idx_of_values = 0;
for idx in 0..row_count {
match is_null(&null_mask, idx) {
Some(true) => builder.push_null(),
_ => {
builder
.try_push_value_ref(&values[idx_of_values].as_value_ref())
.context(CreateVectorSnafu)?;
idx_of_values += 1
}
}
}
Ok(builder.to_vector())
}
}
fn is_null(null_mask: &BitVec, idx: usize) -> Option<bool> {
null_mask.get(idx).as_deref().copied()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_null() {
let null_mask = BitVec::from_slice(&[0b0000_0001, 0b0000_1000]);
assert_eq!(Some(true), is_null(&null_mask, 0));
assert_eq!(Some(false), is_null(&null_mask, 1));
assert_eq!(Some(false), is_null(&null_mask, 10));
assert_eq!(Some(true), is_null(&null_mask, 11));
assert_eq!(Some(false), is_null(&null_mask, 12));
assert_eq!(None, is_null(&null_mask, 16));
assert_eq!(None, is_null(&null_mask, 99));
}
}

View File

@@ -13,9 +13,7 @@
// limitations under the License.
mod alter;
pub mod delete;
pub mod error;
pub mod insert;
pub mod util;
pub use alter::{alter_expr_to_request, create_table_schema};

View File

@@ -90,6 +90,7 @@ fn impl_schema_method(fields: &[ParsedField<'_>]) -> Result<TokenStream2> {
Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::VectorType(VectorTypeExtension { dim: #dim })) })
}
}
// TODO(sunng87): revisit all these implementations
Some(TypeExt::ListType(ext)) => {
let item_type = syn::Ident::new(&ext.datatype.to_string(), ident.span());
quote! {
@@ -108,6 +109,12 @@ fn impl_schema_method(fields: &[ParsedField<'_>]) -> Result<TokenStream2> {
Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::StructType(StructTypeExtension { fields: [#(#fields),*] })) })
}
}
Some(TypeExt::JsonNativeType(ext)) => {
let inner = syn::Ident::new(&ext.datatype.to_string(), ident.span());
quote! {
Some(ColumnDataTypeExtension { type_ext: Some(TypeExt::JsonNativeType(JsonNativeTypeExtension { datatype: #inner })) })
}
}
None => {
quote! { None }
}

View File

@@ -124,6 +124,9 @@ pub struct NodeInfo {
// The node build memory bytes
#[serde(default)]
pub memory_bytes: u64,
// The node build hostname
#[serde(default)]
pub hostname: String,
}
#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq, Serialize, Deserialize)]
@@ -332,6 +335,7 @@ mod tests {
start_time_ms: 1,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
let node_info_bytes: Vec<u8> = node_info.try_into().unwrap();

View File

@@ -131,6 +131,7 @@ pub fn build_raw_table_info_from_expr(expr: &CreateTableExpr) -> RawTableInfo {
region_numbers: vec![],
options: TableOptions::try_from_iter(&expr.table_options).unwrap(),
created_on: DateTime::default(),
updated_on: DateTime::default(),
partition_key_indices: vec![],
column_ids: vec![],
},

View File

@@ -24,7 +24,7 @@ async fn test_heartbeat_mailbox() {
let mailbox = HeartbeatMailbox::new(tx);
let meta = MessageMeta::new_test(1, "test", "foo", "bar");
let reply = InstructionReply::OpenRegion(SimpleReply {
let reply = InstructionReply::OpenRegions(SimpleReply {
result: true,
error: None,
});

View File

@@ -16,7 +16,7 @@ use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use std::time::Duration;
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize};
use store_api::storage::{RegionId, RegionNumber};
use strum::Display;
use table::metadata::TableId;
@@ -394,16 +394,33 @@ impl From<RegionId> for FlushRegions {
}
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
enum SingleOrMultiple<T> {
Single(T),
Multiple(Vec<T>),
}
fn single_or_multiple_from<'de, D, T>(deserializer: D) -> Result<Vec<T>, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
{
let helper = SingleOrMultiple::<T>::deserialize(deserializer)?;
Ok(match helper {
SingleOrMultiple::Single(x) => vec![x],
SingleOrMultiple::Multiple(xs) => xs,
})
}
#[derive(Debug, Clone, Serialize, Deserialize, Display, PartialEq)]
pub enum Instruction {
/// Opens a region.
///
/// - Returns true if a specified region exists.
OpenRegion(OpenRegion),
/// Closes a region.
///
/// - Returns true if a specified region does not exist.
CloseRegion(RegionIdent),
/// Opens regions.
#[serde(deserialize_with = "single_or_multiple_from", alias = "OpenRegion")]
OpenRegions(Vec<OpenRegion>),
/// Closes regions.
#[serde(deserialize_with = "single_or_multiple_from", alias = "CloseRegion")]
CloseRegions(Vec<RegionIdent>),
/// Upgrades a region.
UpgradeRegion(UpgradeRegion),
/// Downgrades a region.
@@ -438,8 +455,10 @@ impl Display for UpgradeRegionReply {
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InstructionReply {
OpenRegion(SimpleReply),
CloseRegion(SimpleReply),
#[serde(alias = "open_region")]
OpenRegions(SimpleReply),
#[serde(alias = "close_region")]
CloseRegions(SimpleReply),
UpgradeRegion(UpgradeRegionReply),
DowngradeRegion(DowngradeRegionReply),
FlushRegions(FlushRegionReply),
@@ -448,8 +467,8 @@ pub enum InstructionReply {
impl Display for InstructionReply {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::OpenRegion(reply) => write!(f, "InstructionReply::OpenRegion({})", reply),
Self::CloseRegion(reply) => write!(f, "InstructionReply::CloseRegion({})", reply),
Self::OpenRegions(reply) => write!(f, "InstructionReply::OpenRegions({})", reply),
Self::CloseRegions(reply) => write!(f, "InstructionReply::CloseRegions({})", reply),
Self::UpgradeRegion(reply) => write!(f, "InstructionReply::UpgradeRegion({})", reply),
Self::DowngradeRegion(reply) => {
write!(f, "InstructionReply::DowngradeRegion({})", reply)
@@ -459,13 +478,30 @@ impl Display for InstructionReply {
}
}
#[cfg(any(test, feature = "testing"))]
impl InstructionReply {
pub fn expect_close_regions_reply(self) -> SimpleReply {
match self {
Self::CloseRegions(reply) => reply,
_ => panic!("Expected CloseRegions reply"),
}
}
pub fn expect_open_regions_reply(self) -> SimpleReply {
match self {
Self::OpenRegions(reply) => reply,
_ => panic!("Expected OpenRegions reply"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_serialize_instruction() {
let open_region = Instruction::OpenRegion(OpenRegion::new(
let open_region = Instruction::OpenRegions(vec![OpenRegion::new(
RegionIdent {
datanode_id: 2,
table_id: 1024,
@@ -476,30 +512,78 @@ mod tests {
HashMap::new(),
HashMap::new(),
false,
));
)]);
let serialized = serde_json::to_string(&open_region).unwrap();
assert_eq!(
r#"{"OpenRegion":{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}}"#,
r#"{"OpenRegions":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#,
serialized
);
let close_region = Instruction::CloseRegion(RegionIdent {
let close_region = Instruction::CloseRegions(vec![RegionIdent {
datanode_id: 2,
table_id: 1024,
region_number: 1,
engine: "mito2".to_string(),
});
}]);
let serialized = serde_json::to_string(&close_region).unwrap();
assert_eq!(
r#"{"CloseRegion":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}}"#,
r#"{"CloseRegions":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#,
serialized
);
}
#[test]
fn test_deserialize_instruction() {
let open_region_instruction = r#"{"OpenRegion":[{"region_ident":{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"},"region_storage_path":"test/foo","region_options":{},"region_wal_options":{},"skip_wal_replay":false}]}"#;
let open_region_instruction: Instruction =
serde_json::from_str(open_region_instruction).unwrap();
let open_region = Instruction::OpenRegions(vec![OpenRegion::new(
RegionIdent {
datanode_id: 2,
table_id: 1024,
region_number: 1,
engine: "mito2".to_string(),
},
"test/foo",
HashMap::new(),
HashMap::new(),
false,
)]);
assert_eq!(open_region_instruction, open_region);
let close_region_instruction = r#"{"CloseRegion":[{"datanode_id":2,"table_id":1024,"region_number":1,"engine":"mito2"}]}"#;
let close_region_instruction: Instruction =
serde_json::from_str(close_region_instruction).unwrap();
let close_region = Instruction::CloseRegions(vec![RegionIdent {
datanode_id: 2,
table_id: 1024,
region_number: 1,
engine: "mito2".to_string(),
}]);
assert_eq!(close_region_instruction, close_region);
let close_region_instruction_reply =
r#"{"result":true,"error":null,"type":"close_region"}"#;
let close_region_instruction_reply: InstructionReply =
serde_json::from_str(close_region_instruction_reply).unwrap();
let close_region_reply = InstructionReply::CloseRegions(SimpleReply {
result: true,
error: None,
});
assert_eq!(close_region_instruction_reply, close_region_reply);
let open_region_instruction_reply = r#"{"result":true,"error":null,"type":"open_region"}"#;
let open_region_instruction_reply: InstructionReply =
serde_json::from_str(open_region_instruction_reply).unwrap();
let open_region_reply = InstructionReply::OpenRegions(SimpleReply {
result: true,
error: None,
});
assert_eq!(open_region_instruction_reply, open_region_reply);
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct LegacyOpenRegion {
region_ident: RegionIdent,

View File

@@ -287,8 +287,13 @@ mod tests {
#[test]
fn test_deserialization_compatibility() {
let s = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#;
let v = TableInfoValue::try_from_raw_value(s.as_bytes()).unwrap();
let old_fmt = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":null},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#;
let new_fmt = r#"{"version":1,"table_info":{"ident":{"table_id":8714,"version":0},"name":"go_gc_duration_seconds","desc":"Created on insertion","catalog_name":"e87lehzy63d4cloud_docs_test","schema_name":"public","meta":{"schema":{"column_schemas":[{"name":"instance","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"job","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"quantile","data_type":{"String":{"size_type":"Utf8"}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}},{"name":"greptime_timestamp","data_type":{"Timestamp":{"Millisecond":null}},"is_nullable":false,"is_time_index":true,"default_constraint":null,"metadata":{"greptime:time_index":"true"}},{"name":"greptime_value","data_type":{"Float64":{}},"is_nullable":true,"is_time_index":false,"default_constraint":null,"metadata":{}}],"timestamp_index":3,"version":0},"primary_key_indices":[0,1,2],"value_indices":[],"engine":"mito","next_column_id":5,"region_numbers":[],"engine_options":{},"options":{"write_buffer_size":null,"ttl":null,"extra_options":{}},"created_on":"1970-01-01T00:00:00Z"},"table_type":"Base"}}"#;
let v = TableInfoValue::try_from_raw_value(old_fmt.as_bytes()).unwrap();
let new_v = TableInfoValue::try_from_raw_value(new_fmt.as_bytes()).unwrap();
assert_eq!(v, new_v);
assert_eq!(v.table_info.meta.created_on, v.table_info.meta.updated_on);
assert!(v.table_info.meta.partition_key_indices.is_empty());
}
@@ -328,6 +333,7 @@ mod tests {
schema: RawSchema::from(&schema),
engine: "mito".to_string(),
created_on: chrono::DateTime::default(),
updated_on: chrono::DateTime::default(),
primary_key_indices: vec![0, 1],
next_column_id: 3,
value_indices: vec![2, 3],

View File

@@ -1503,6 +1503,7 @@ mod tests {
region_numbers: vec![0],
options: Default::default(),
created_on: Default::default(),
updated_on: Default::default(),
partition_key_indices: Default::default(),
column_ids: Default::default(),
};

View File

@@ -12,8 +12,7 @@ use api::v1::{
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};
use crate::error;
use crate::error::Result;
use crate::error::{self, Result, TooLargeDurationSnafu};
use crate::rpc::ddl::DdlTask;
// Create trigger
@@ -27,7 +26,11 @@ pub struct CreateTriggerTask {
pub labels: HashMap<String, String>,
pub annotations: HashMap<String, String>,
pub interval: Duration,
pub raw_interval_expr: String,
pub raw_interval_expr: Option<String>,
pub r#for: Option<Duration>,
pub for_raw_expr: Option<String>,
pub keep_firing_for: Option<Duration>,
pub keep_firing_for_raw_expr: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
@@ -62,10 +65,20 @@ impl TryFrom<CreateTriggerTask> for PbCreateTriggerTask {
.map(PbNotifyChannel::from)
.collect();
let interval = task
.interval
.try_into()
.context(error::TooLargeDurationSnafu)?;
let interval = task.interval.try_into().context(TooLargeDurationSnafu)?;
let raw_interval_expr = task.raw_interval_expr.unwrap_or_default();
let r#for = task
.r#for
.map(|d| d.try_into().context(TooLargeDurationSnafu))
.transpose()?;
let for_raw_expr = task.for_raw_expr.unwrap_or_default();
let keep_firing_for = task
.keep_firing_for
.map(|d| d.try_into().context(TooLargeDurationSnafu))
.transpose()?;
let keep_firing_for_raw_expr = task.keep_firing_for_raw_expr.unwrap_or_default();
let expr = PbCreateTriggerExpr {
catalog_name: task.catalog_name,
@@ -76,7 +89,11 @@ impl TryFrom<CreateTriggerTask> for PbCreateTriggerTask {
labels: task.labels,
annotations: task.annotations,
interval: Some(interval),
raw_interval_expr: task.raw_interval_expr,
raw_interval_expr,
r#for,
for_raw_expr,
keep_firing_for,
keep_firing_for_raw_expr,
};
Ok(PbCreateTriggerTask {
@@ -102,6 +119,26 @@ impl TryFrom<PbCreateTriggerTask> for CreateTriggerTask {
let interval = expr.interval.context(error::MissingIntervalSnafu)?;
let interval = interval.try_into().context(error::NegativeDurationSnafu)?;
let r#for = expr
.r#for
.map(Duration::try_from)
.transpose()
.context(error::NegativeDurationSnafu)?;
let keep_firing_for = expr
.keep_firing_for
.map(Duration::try_from)
.transpose()
.context(error::NegativeDurationSnafu)?;
let raw_interval_expr =
(!expr.raw_interval_expr.is_empty()).then_some(expr.raw_interval_expr);
let for_raw_expr = (!expr.for_raw_expr.is_empty()).then_some(expr.for_raw_expr);
let keep_firing_for_raw_expr =
(!expr.keep_firing_for_raw_expr.is_empty()).then_some(expr.keep_firing_for_raw_expr);
let task = CreateTriggerTask {
catalog_name: expr.catalog_name,
trigger_name: expr.trigger_name,
@@ -111,7 +148,11 @@ impl TryFrom<PbCreateTriggerTask> for CreateTriggerTask {
labels: expr.labels,
annotations: expr.annotations,
interval,
raw_interval_expr: expr.raw_interval_expr,
raw_interval_expr,
r#for,
for_raw_expr,
keep_firing_for,
keep_firing_for_raw_expr,
};
Ok(task)
}
@@ -271,7 +312,11 @@ mod tests {
.into_iter()
.collect(),
interval: Duration::from_secs(60),
raw_interval_expr: "'1 minute'::INTERVAL".to_string(),
raw_interval_expr: Some("'1 minute'::INTERVAL".to_string()),
r#for: Duration::from_secs(300).into(),
for_raw_expr: Some("'5 minute'::INTERVAL".to_string()),
keep_firing_for: Duration::from_secs(600).into(),
keep_firing_for_raw_expr: Some("'10 minute'::INTERVAL".to_string()),
};
let pb_task: PbCreateTriggerTask = original.clone().try_into().unwrap();
@@ -306,6 +351,14 @@ mod tests {
assert_eq!(original.labels, round_tripped.labels);
assert_eq!(original.annotations, round_tripped.annotations);
assert_eq!(original.interval, round_tripped.interval);
assert_eq!(original.raw_interval_expr, round_tripped.raw_interval_expr);
assert_eq!(original.r#for, round_tripped.r#for);
assert_eq!(original.for_raw_expr, round_tripped.for_raw_expr);
assert_eq!(original.keep_firing_for, round_tripped.keep_firing_for);
assert_eq!(
original.keep_firing_for_raw_expr,
round_tripped.keep_firing_for_raw_expr
);
// Invalid, since create_trigger is None and it's required.
let invalid_task = PbCreateTriggerTask {

View File

@@ -27,4 +27,9 @@ snafu.workspace = true
tokio.workspace = true
[dev-dependencies]
criterion = "0.7.0"
tokio.workspace = true
[[bench]]
name = "iter_record_batch_rows"
harness = false

View File

@@ -0,0 +1,179 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::hint::black_box;
use std::sync::Arc;
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use datafusion::arrow::array::{Int32Array, TimestampMillisecondArray};
use datafusion::arrow::datatypes::{DataType, Field, TimeUnit};
use datafusion_common::arrow::array::{ArrayRef, RecordBatch, StringArray};
use datafusion_common::arrow::datatypes::Schema;
use datafusion_common::{ScalarValue, utils};
use datatypes::arrow::array::AsArray;
use datatypes::arrow::datatypes::{
Int32Type, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
TimestampSecondType,
};
use datatypes::schema::SchemaRef;
fn prepare_record_batch(rows: usize) -> RecordBatch {
let schema = Schema::new(vec![
Field::new(
"ts",
DataType::Timestamp(TimeUnit::Millisecond, None),
false,
),
Field::new("i", DataType::Int32, true),
Field::new("s", DataType::Utf8, true),
]);
let columns: Vec<ArrayRef> = vec![
Arc::new(TimestampMillisecondArray::from_iter_values(
(0..rows).map(|x| (1760313600000 + x) as i64),
)),
Arc::new(Int32Array::from_iter_values((0..rows).map(|x| x as i32))),
Arc::new(StringArray::from_iter((0..rows).map(|x| {
if x % 2 == 0 {
Some(format!("s_{x}"))
} else {
None
}
}))),
];
RecordBatch::try_new(Arc::new(schema), columns).unwrap()
}
fn iter_by_greptimedb_values(schema: SchemaRef, record_batch: RecordBatch) {
let record_batch =
common_recordbatch::RecordBatch::try_from_df_record_batch(schema, record_batch).unwrap();
for row in record_batch.rows() {
black_box(row);
}
}
fn iter_by_loop_rows_and_columns(record_batch: RecordBatch) {
for i in 0..record_batch.num_rows() {
for column in record_batch.columns() {
match column.data_type() {
DataType::Timestamp(time_unit, _) => {
let v = match time_unit {
TimeUnit::Second => {
let array = column.as_primitive::<TimestampSecondType>();
array.value(i)
}
TimeUnit::Millisecond => {
let array = column.as_primitive::<TimestampMillisecondType>();
array.value(i)
}
TimeUnit::Microsecond => {
let array = column.as_primitive::<TimestampMicrosecondType>();
array.value(i)
}
TimeUnit::Nanosecond => {
let array = column.as_primitive::<TimestampNanosecondType>();
array.value(i)
}
};
black_box(v);
}
DataType::Int32 => {
let array = column.as_primitive::<Int32Type>();
let v = array.value(i);
black_box(v);
}
DataType::Utf8 => {
let array = column.as_string::<i32>();
let v = array.value(i);
black_box(v);
}
_ => unreachable!(),
}
}
}
}
fn iter_by_datafusion_scalar_values(record_batch: RecordBatch) {
let columns = record_batch.columns();
for i in 0..record_batch.num_rows() {
let row = utils::get_row_at_idx(columns, i).unwrap();
black_box(row);
}
}
fn iter_by_datafusion_scalar_values_with_buf(record_batch: RecordBatch) {
let columns = record_batch.columns();
let mut buf = vec![ScalarValue::Null; columns.len()];
for i in 0..record_batch.num_rows() {
utils::extract_row_at_idx_to_buf(columns, i, &mut buf).unwrap();
}
}
pub fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("iter_record_batch");
for rows in [1usize, 10, 100, 1_000, 10_000] {
group.bench_with_input(
BenchmarkId::new("by_greptimedb_values", rows),
&rows,
|b, rows| {
let record_batch = prepare_record_batch(*rows);
let schema =
Arc::new(datatypes::schema::Schema::try_from(record_batch.schema()).unwrap());
b.iter(|| {
iter_by_greptimedb_values(schema.clone(), record_batch.clone());
})
},
);
group.bench_with_input(
BenchmarkId::new("by_loop_rows_and_columns", rows),
&rows,
|b, rows| {
let record_batch = prepare_record_batch(*rows);
b.iter(|| {
iter_by_loop_rows_and_columns(record_batch.clone());
})
},
);
group.bench_with_input(
BenchmarkId::new("by_datafusion_scalar_values", rows),
&rows,
|b, rows| {
let record_batch = prepare_record_batch(*rows);
b.iter(|| {
iter_by_datafusion_scalar_values(record_batch.clone());
})
},
);
group.bench_with_input(
BenchmarkId::new("by_datafusion_scalar_values_with_buf", rows),
&rows,
|b, rows| {
let record_batch = prepare_record_batch(*rows);
b.iter(|| {
iter_by_datafusion_scalar_values_with_buf(record_batch.clone());
})
},
);
}
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);

View File

@@ -33,7 +33,7 @@ use datatypes::arrow::util::pretty;
use datatypes::prelude::{ConcreteDataType, VectorRef};
use datatypes::scalars::{ScalarVector, ScalarVectorBuilder};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::types::json_type_value_to_string;
use datatypes::types::{JsonFormat, jsonb_to_string};
use datatypes::vectors::{BinaryVector, StringVectorBuilder};
use error::Result;
use futures::task::{Context, Poll};
@@ -90,32 +90,34 @@ pub fn map_json_type_to_string(
) -> Result<RecordBatch> {
let mut vectors = Vec::with_capacity(original_schema.column_schemas().len());
for (vector, schema) in batch.columns.iter().zip(original_schema.column_schemas()) {
if let ConcreteDataType::Json(j) = schema.data_type {
let mut string_vector_builder = StringVectorBuilder::with_capacity(vector.len());
let binary_vector = vector
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| error::DowncastVectorSnafu {
from_type: schema.data_type.clone(),
to_type: ConcreteDataType::binary_datatype(),
})?;
for value in binary_vector.iter_data() {
let Some(value) = value else {
string_vector_builder.push(None);
continue;
};
let string_value =
json_type_value_to_string(value, &j.format).with_context(|_| {
error::CastVectorSnafu {
if let ConcreteDataType::Json(j) = &schema.data_type {
if matches!(&j.format, JsonFormat::Jsonb) {
let mut string_vector_builder = StringVectorBuilder::with_capacity(vector.len());
let binary_vector = vector
.as_any()
.downcast_ref::<BinaryVector>()
.with_context(|| error::DowncastVectorSnafu {
from_type: schema.data_type.clone(),
to_type: ConcreteDataType::binary_datatype(),
})?;
for value in binary_vector.iter_data() {
let Some(value) = value else {
string_vector_builder.push(None);
continue;
};
let string_value =
jsonb_to_string(value).with_context(|_| error::CastVectorSnafu {
from_type: schema.data_type.clone(),
to_type: ConcreteDataType::string_datatype(),
}
})?;
string_vector_builder.push(Some(string_value.as_str()));
}
})?;
string_vector_builder.push(Some(string_value.as_str()));
}
let string_vector = string_vector_builder.finish();
vectors.push(Arc::new(string_vector) as VectorRef);
let string_vector = string_vector_builder.finish();
vectors.push(Arc::new(string_vector) as VectorRef);
} else {
vectors.push(vector.clone());
}
} else {
vectors.push(vector.clone());
}

View File

@@ -16,9 +16,10 @@ use std::str::FromStr;
use common_time::Timestamp;
use common_time::timezone::Timezone;
use datatypes::json::JsonStructureSettings;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::ColumnDefaultConstraint;
use datatypes::types::{parse_string_to_json_type_value, parse_string_to_vector_type_value};
use datatypes::types::{JsonFormat, parse_string_to_jsonb, parse_string_to_vector_type_value};
use datatypes::value::{OrderedF32, OrderedF64, Value};
use snafu::{OptionExt, ResultExt, ensure};
pub use sqlparser::ast::{
@@ -210,7 +211,8 @@ pub fn sql_value_to_value(
| Value::Duration(_)
| Value::IntervalYearMonth(_)
| Value::IntervalDayTime(_)
| Value::IntervalMonthDayNano(_) => match unary_op {
| Value::IntervalMonthDayNano(_)
| Value::Json(_) => match unary_op {
UnaryOperator::Plus => {}
UnaryOperator::Minus => {
value = value
@@ -297,8 +299,21 @@ pub(crate) fn parse_string_to_value(
}
ConcreteDataType::Binary(_) => Ok(Value::Binary(s.as_bytes().into())),
ConcreteDataType::Json(j) => {
let v = parse_string_to_json_type_value(&s, &j.format).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
match &j.format {
JsonFormat::Jsonb => {
let v = parse_string_to_jsonb(&s).context(DatatypeSnafu)?;
Ok(Value::Binary(v.into()))
}
JsonFormat::Native(_inner) => {
// Always use the structured version at this level.
let serde_json_value =
serde_json::from_str(&s).context(DeserializeSnafu { json: s })?;
let json_structure_settings = JsonStructureSettings::Structured(None);
json_structure_settings
.encode(serde_json_value)
.context(DatatypeSnafu)
}
}
}
ConcreteDataType::Vector(d) => {
let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?;

View File

@@ -5,9 +5,12 @@ edition.workspace = true
license.workspace = true
[dependencies]
common-base.workspace = true
lazy_static.workspace = true
nix.workspace = true
num_cpus.workspace = true
prometheus.workspace = true
sysinfo.workspace = true
[lints]
workspace = true

View File

@@ -23,9 +23,6 @@ use prometheus::core::{Collector, Desc};
use prometheus::proto::MetricFamily;
use prometheus::{IntGauge, Opts};
/// `MAX_VALUE` is used to indicate that the resource is unlimited.
pub const MAX_VALUE: i64 = -1;
const CGROUP_UNIFIED_MOUNTPOINT: &str = "/sys/fs/cgroup";
const MEMORY_MAX_FILE_CGROUP_V2: &str = "memory.max";
@@ -43,11 +40,11 @@ const MAX_VALUE_CGROUP_V2: &str = "max";
// For easier comparison, if the memory limit is larger than 1PB we consider it as unlimited.
const MAX_MEMORY_IN_BYTES: i64 = 1125899906842624; // 1PB
/// Get the limit of memory in bytes.
/// Get the limit of memory in bytes from cgroups filesystem.
///
/// - If the memory is unlimited, return `-1`.
/// - If the cgroup total memory is unset, return `None`.
/// - Return `None` if it fails to read the memory limit or not on linux.
pub fn get_memory_limit() -> Option<i64> {
pub fn get_memory_limit_from_cgroups() -> Option<i64> {
#[cfg(target_os = "linux")]
{
let memory_max_file = if is_cgroup_v2()? {
@@ -58,13 +55,13 @@ pub fn get_memory_limit() -> Option<i64> {
MEMORY_MAX_FILE_CGROUP_V1
};
// For cgroup v1, it will return a very large value(different from platform) if the memory is unlimited.
// For cgroup v1, it will return a very large value(different from platform) if the memory is unset.
let memory_limit =
read_value_from_file(Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(memory_max_file))?;
// If memory limit exceeds 1PB(cgroup v1), consider it as unlimited.
// If memory limit exceeds 1PB(cgroup v1), consider it as unset.
if memory_limit > MAX_MEMORY_IN_BYTES {
return Some(MAX_VALUE);
return None;
}
Some(memory_limit)
}
@@ -73,10 +70,10 @@ pub fn get_memory_limit() -> Option<i64> {
None
}
/// Get the usage of memory in bytes.
/// Get the usage of memory in bytes from cgroups filesystem.
///
/// - Return `None` if it fails to read the memory usage or not on linux or cgroup is v1.
pub fn get_memory_usage() -> Option<i64> {
pub fn get_memory_usage_from_cgroups() -> Option<i64> {
#[cfg(target_os = "linux")]
{
if is_cgroup_v2()? {
@@ -93,11 +90,11 @@ pub fn get_memory_usage() -> Option<i64> {
None
}
/// Get the limit of cpu in millicores.
/// Get the limit of cpu in millicores from cgroups filesystem.
///
/// - If the cpu is unlimited, return `-1`.
/// - If the cpu limit is unset, return `None`.
/// - Return `None` if it fails to read the cpu limit or not on linux.
pub fn get_cpu_limit() -> Option<i64> {
pub fn get_cpu_limit_from_cgroups() -> Option<i64> {
#[cfg(target_os = "linux")]
if is_cgroup_v2()? {
// Read `/sys/fs/cgroup/cpu.max` to get the cpu limit.
@@ -108,10 +105,6 @@ pub fn get_cpu_limit() -> Option<i64> {
Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_QUOTA_FILE_CGROUP_V1),
)?;
if quota == MAX_VALUE {
return Some(MAX_VALUE);
}
let period = read_value_from_file(
Path::new(CGROUP_UNIFIED_MOUNTPOINT).join(CPU_PERIOD_FILE_CGROUP_V1),
)?;
@@ -167,9 +160,9 @@ fn is_cgroup_v2() -> Option<bool> {
fn read_value_from_file<P: AsRef<Path>>(path: P) -> Option<i64> {
let content = read_to_string(&path).ok()?;
// If the content starts with "max", return `MAX_VALUE`.
// If the content starts with "max", return `None`.
if content.starts_with(MAX_VALUE_CGROUP_V2) {
return Some(MAX_VALUE);
return None;
}
content.trim().parse::<i64>().ok()
@@ -183,10 +176,10 @@ fn get_cgroup_v2_cpu_limit<P: AsRef<Path>>(path: P) -> Option<i64> {
return None;
}
// If the cpu is unlimited, it will be `-1`.
// If the cgroup cpu limit is unset, return `None`.
let quota = fields[0].trim();
if quota == MAX_VALUE_CGROUP_V2 {
return Some(MAX_VALUE);
return None;
}
let quota = quota.parse::<i64>().ok()?;
@@ -241,7 +234,7 @@ impl Collector for CgroupsMetricsCollector {
self.cpu_usage.set(cpu_usage);
}
if let Some(memory_usage) = get_memory_usage() {
if let Some(memory_usage) = get_memory_usage_from_cgroups() {
self.memory_usage.set(memory_usage);
}
@@ -263,8 +256,8 @@ mod tests {
100000
);
assert_eq!(
read_value_from_file(Path::new("testdata").join("memory.max.unlimited")).unwrap(),
MAX_VALUE
read_value_from_file(Path::new("testdata").join("memory.max.unlimited")),
None
);
assert_eq!(read_value_from_file(Path::new("non_existent_file")), None);
}
@@ -276,8 +269,8 @@ mod tests {
1500
);
assert_eq!(
get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max.unlimited")).unwrap(),
MAX_VALUE
get_cgroup_v2_cpu_limit(Path::new("testdata").join("cpu.max.unlimited")),
None
);
assert_eq!(
get_cgroup_v2_cpu_limit(Path::new("non_existent_file")),

View File

@@ -15,3 +15,64 @@
mod cgroups;
pub use cgroups::*;
use common_base::readable_size::ReadableSize;
use sysinfo::System;
/// Get the total CPU in millicores.
pub fn get_total_cpu_millicores() -> i64 {
// Get CPU limit from cgroups filesystem.
if let Some(cgroup_cpu_limit) = get_cpu_limit_from_cgroups() {
cgroup_cpu_limit
} else {
// Get total CPU cores from host system.
num_cpus::get() as i64 * 1000
}
}
/// Get the total memory in bytes.
pub fn get_total_memory_bytes() -> i64 {
// Get memory limit from cgroups filesystem.
if let Some(cgroup_memory_limit) = get_memory_limit_from_cgroups() {
cgroup_memory_limit
} else {
// Get total memory from host system.
if sysinfo::IS_SUPPORTED_SYSTEM {
let mut sys_info = System::new();
sys_info.refresh_memory();
sys_info.total_memory() as i64
} else {
// If the system is not supported, return -1.
-1
}
}
}
/// Get the total CPU cores. The result will be rounded to the nearest integer.
/// For example, if the total CPU is 1.5 cores(1500 millicores), the result will be 2.
pub fn get_total_cpu_cores() -> usize {
((get_total_cpu_millicores() as f64) / 1000.0).round() as usize
}
/// Get the total memory in readable size.
pub fn get_total_memory_readable() -> Option<ReadableSize> {
if get_total_memory_bytes() > 0 {
Some(ReadableSize(get_total_memory_bytes() as u64))
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_get_total_cpu_cores() {
assert!(get_total_cpu_cores() > 0);
}
#[test]
fn test_get_total_memory_readable() {
assert!(get_total_memory_readable().unwrap() > ReadableSize::mb(0));
}
}

View File

@@ -25,6 +25,7 @@ use crate::config::kafka::common::{
};
use crate::config::kafka::{DatanodeKafkaConfig, MetasrvKafkaConfig};
use crate::config::raft_engine::RaftEngineConfig;
use crate::error::{Error, UnsupportedWalProviderSnafu};
/// Wal configurations for metasrv.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
@@ -43,6 +44,7 @@ pub enum MetasrvWalConfig {
pub enum DatanodeWalConfig {
RaftEngine(RaftEngineConfig),
Kafka(DatanodeKafkaConfig),
Noop,
}
impl Default for DatanodeWalConfig {
@@ -51,11 +53,13 @@ impl Default for DatanodeWalConfig {
}
}
impl From<DatanodeWalConfig> for MetasrvWalConfig {
fn from(config: DatanodeWalConfig) -> Self {
impl TryFrom<DatanodeWalConfig> for MetasrvWalConfig {
type Error = Error;
fn try_from(config: DatanodeWalConfig) -> Result<Self, Self::Error> {
match config {
DatanodeWalConfig::RaftEngine(_) => Self::RaftEngine,
DatanodeWalConfig::Kafka(config) => Self::Kafka(MetasrvKafkaConfig {
DatanodeWalConfig::RaftEngine(_) => Ok(Self::RaftEngine),
DatanodeWalConfig::Kafka(config) => Ok(Self::Kafka(MetasrvKafkaConfig {
connection: config.connection,
kafka_topic: config.kafka_topic,
auto_create_topics: config.auto_create_topics,
@@ -67,7 +71,11 @@ impl From<DatanodeWalConfig> for MetasrvWalConfig {
flush_trigger_size: DEFAULT_FLUSH_TRIGGER_SIZE,
// This field won't be used in standalone mode
checkpoint_trigger_size: DEFAULT_CHECKPOINT_TRIGGER_SIZE,
}),
})),
DatanodeWalConfig::Noop => UnsupportedWalProviderSnafu {
provider: "noop".to_string(),
}
.fail(),
}
}
}

View File

@@ -92,6 +92,13 @@ pub enum Error {
#[snafu(implicit)]
location: Location,
},
#[snafu(display("Unsupported WAL provider: {}", provider))]
UnsupportedWalProvider {
provider: String,
#[snafu(implicit)]
location: Location,
},
}
pub type Result<T> = std::result::Result<T, Error>;

View File

@@ -43,6 +43,7 @@ datatypes.workspace = true
file-engine.workspace = true
futures.workspace = true
futures-util.workspace = true
hostname.workspace = true
humantime-serde.workspace = true
lazy_static.workspace = true
log-store.workspace = true
@@ -61,7 +62,6 @@ servers.workspace = true
session.workspace = true
snafu.workspace = true
store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
toml.workspace = true

View File

@@ -34,6 +34,7 @@ use common_wal::config::raft_engine::RaftEngineConfig;
use file_engine::engine::FileRegionEngine;
use log_store::kafka::log_store::KafkaLogStore;
use log_store::kafka::{GlobalIndexCollector, default_index_file};
use log_store::noop::log_store::NoopLogStore;
use log_store::raft_engine::log_store::RaftEngineLogStore;
use meta_client::MetaClientRef;
use metric_engine::engine::MetricEngine;
@@ -561,6 +562,27 @@ impl DatanodeBuilder {
self.extension_range_provider_factory.take(),
);
builder.try_build().await.context(BuildMitoEngineSnafu)?
}
DatanodeWalConfig::Noop => {
let log_store = Arc::new(NoopLogStore);
let builder = MitoEngineBuilder::new(
&opts.storage.data_home,
config,
log_store,
object_store_manager,
schema_metadata_manager,
file_ref_manager,
partition_expr_fetcher.clone(),
plugins,
);
#[cfg(feature = "enterprise")]
let builder = builder.with_extension_range_provider_factory(
self.extension_range_provider_factory.take(),
);
builder.try_build().await.context(BuildMitoEngineSnafu)?
}
};

View File

@@ -90,7 +90,10 @@ impl HeartbeatTask {
let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![
region_alive_keeper.clone(),
Arc::new(ParseMailboxMessageHandler),
Arc::new(RegionHeartbeatResponseHandler::new(region_server.clone())),
Arc::new(
RegionHeartbeatResponseHandler::new(region_server.clone())
.with_open_region_parallelism(opts.init_regions_parallelism),
),
Arc::new(InvalidateCacheHandler::new(cache_invalidator)),
]));
@@ -251,6 +254,10 @@ impl HeartbeatTask {
start_time_ms: node_epoch,
cpus,
memory_bytes,
hostname: hostname::get()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
}),
node_workloads: Some(NodeWorkloads::Datanode(DatanodeWorkloads {
types: workload_types.iter().map(|w| w.to_i32()).collect(),

View File

@@ -40,6 +40,7 @@ pub struct RegionHeartbeatResponseHandler {
catchup_tasks: TaskTracker<()>,
downgrade_tasks: TaskTracker<()>,
flush_tasks: TaskTracker<()>,
open_region_parallelism: usize,
}
/// Handler of the instruction.
@@ -78,17 +79,29 @@ impl RegionHeartbeatResponseHandler {
catchup_tasks: TaskTracker::new(),
downgrade_tasks: TaskTracker::new(),
flush_tasks: TaskTracker::new(),
// Default to half of the number of CPUs.
open_region_parallelism: (num_cpus::get() / 2).max(1),
}
}
/// Sets the parallelism for opening regions.
pub fn with_open_region_parallelism(mut self, parallelism: usize) -> Self {
self.open_region_parallelism = parallelism;
self
}
/// Builds the [InstructionHandler].
fn build_handler(instruction: Instruction) -> MetaResult<InstructionHandler> {
fn build_handler(&self, instruction: Instruction) -> MetaResult<InstructionHandler> {
match instruction {
Instruction::OpenRegion(open_region) => Ok(Box::new(move |handler_context| {
handler_context.handle_open_region_instruction(open_region)
})),
Instruction::CloseRegion(close_region) => Ok(Box::new(|handler_context| {
handler_context.handle_close_region_instruction(close_region)
Instruction::OpenRegions(open_regions) => {
let open_region_parallelism = self.open_region_parallelism;
Ok(Box::new(move |handler_context| {
handler_context
.handle_open_regions_instruction(open_regions, open_region_parallelism)
}))
}
Instruction::CloseRegions(close_regions) => Ok(Box::new(move |handler_context| {
handler_context.handle_close_regions_instruction(close_regions)
})),
Instruction::DowngradeRegion(downgrade_region) => {
Ok(Box::new(move |handler_context| {
@@ -109,14 +122,22 @@ impl RegionHeartbeatResponseHandler {
#[async_trait]
impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
fn is_acceptable(&self, ctx: &HeartbeatResponseHandlerContext) -> bool {
matches!(
ctx.incoming_message.as_ref(),
Some((_, Instruction::OpenRegion { .. }))
| Some((_, Instruction::CloseRegion { .. }))
| Some((_, Instruction::DowngradeRegion { .. }))
| Some((_, Instruction::UpgradeRegion { .. }))
| Some((_, Instruction::FlushRegions { .. }))
)
matches!(ctx.incoming_message.as_ref(), |Some((
_,
Instruction::DowngradeRegion { .. },
))| Some((
_,
Instruction::UpgradeRegion { .. }
)) | Some((
_,
Instruction::FlushRegions { .. }
)) | Some((
_,
Instruction::OpenRegions { .. }
)) | Some((
_,
Instruction::CloseRegions { .. }
)))
}
async fn handle(&self, ctx: &mut HeartbeatResponseHandlerContext) -> MetaResult<HandleControl> {
@@ -130,7 +151,7 @@ impl HeartbeatResponseHandler for RegionHeartbeatResponseHandler {
let catchup_tasks = self.catchup_tasks.clone();
let downgrade_tasks = self.downgrade_tasks.clone();
let flush_tasks = self.flush_tasks.clone();
let handler = Self::build_handler(instruction)?;
let handler = self.build_handler(instruction)?;
let _handle = common_runtime::spawn_global(async move {
let reply = handler(HandlerContext {
region_server,
@@ -176,8 +197,8 @@ mod tests {
use crate::tests::mock_region_server;
pub struct HeartbeatResponseTestEnv {
mailbox: MailboxRef,
receiver: Receiver<(MessageMeta, InstructionReply)>,
pub(crate) mailbox: MailboxRef,
pub(crate) receiver: Receiver<(MessageMeta, InstructionReply)>,
}
impl HeartbeatResponseTestEnv {
@@ -248,16 +269,16 @@ mod tests {
}
fn close_region_instruction(region_id: RegionId) -> Instruction {
Instruction::CloseRegion(RegionIdent {
Instruction::CloseRegions(vec![RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
})
}])
}
fn open_region_instruction(region_id: RegionId, path: &str) -> Instruction {
Instruction::OpenRegion(OpenRegion::new(
Instruction::OpenRegions(vec![OpenRegion::new(
RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
@@ -268,7 +289,7 @@ mod tests {
HashMap::new(),
HashMap::new(),
false,
))
)])
}
#[tokio::test]
@@ -303,7 +324,7 @@ mod tests {
let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
if let InstructionReply::CloseRegion(reply) = reply {
if let InstructionReply::CloseRegions(reply) = reply {
assert!(reply.result);
assert!(reply.error.is_none());
} else {
@@ -358,7 +379,7 @@ mod tests {
let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
if let InstructionReply::OpenRegion(reply) = reply {
if let InstructionReply::OpenRegions(reply) = reply {
assert!(reply.result);
assert!(reply.error.is_none());
} else {
@@ -391,7 +412,7 @@ mod tests {
let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
if let InstructionReply::OpenRegion(reply) = reply {
if let InstructionReply::OpenRegions(reply) = reply {
assert!(!reply.result);
assert!(reply.error.is_some());
} else {

View File

@@ -14,7 +14,8 @@
use common_meta::RegionIdent;
use common_meta::instruction::{InstructionReply, SimpleReply};
use common_telemetry::{tracing, warn};
use common_telemetry::warn;
use futures::future::join_all;
use futures_util::future::BoxFuture;
use store_api::region_request::{RegionCloseRequest, RegionRequest};
@@ -22,35 +23,124 @@ use crate::error;
use crate::heartbeat::handler::HandlerContext;
impl HandlerContext {
#[tracing::instrument(skip_all)]
pub(crate) fn handle_close_region_instruction(
pub(crate) fn handle_close_regions_instruction(
self,
region_ident: RegionIdent,
region_idents: Vec<RegionIdent>,
) -> BoxFuture<'static, Option<InstructionReply>> {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
let request = RegionRequest::Close(RegionCloseRequest {});
let result = self.region_server.handle_request(region_id, request).await;
let region_ids = region_idents
.into_iter()
.map(|region_ident| Self::region_ident_to_region_id(&region_ident))
.collect::<Vec<_>>();
match result {
Ok(_) => Some(InstructionReply::CloseRegion(SimpleReply {
let futs = region_ids.iter().map(|region_id| {
self.region_server
.handle_request(*region_id, RegionRequest::Close(RegionCloseRequest {}))
});
let results = join_all(futs).await;
let mut errors = vec![];
for (region_id, result) in region_ids.into_iter().zip(results.into_iter()) {
match result {
Ok(_) => (),
Err(error::Error::RegionNotFound { .. }) => {
warn!(
"Received a close regions instruction from meta, but target region:{} is not found.",
region_id
);
}
Err(err) => errors.push(format!("region:{region_id}: {err:?}")),
}
}
if errors.is_empty() {
return Some(InstructionReply::CloseRegions(SimpleReply {
result: true,
error: None,
})),
Err(error::Error::RegionNotFound { .. }) => {
warn!(
"Received a close region instruction from meta, but target region:{region_id} is not found."
);
Some(InstructionReply::CloseRegion(SimpleReply {
result: true,
error: None,
}))
}
Err(err) => Some(InstructionReply::CloseRegion(SimpleReply {
result: false,
error: Some(format!("{err:?}")),
})),
}));
}
Some(InstructionReply::CloseRegions(SimpleReply {
result: false,
error: Some(errors.join("; ")),
}))
})
}
}
#[cfg(test)]
mod tests {
use std::assert_matches;
use std::sync::Arc;
use assert_matches::assert_matches;
use common_meta::RegionIdent;
use common_meta::heartbeat::handler::{HandleControl, HeartbeatResponseHandler};
use common_meta::heartbeat::mailbox::MessageMeta;
use common_meta::instruction::Instruction;
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
use store_api::region_request::RegionRequest;
use store_api::storage::RegionId;
use crate::heartbeat::handler::RegionHeartbeatResponseHandler;
use crate::heartbeat::handler::tests::HeartbeatResponseTestEnv;
use crate::tests::mock_region_server;
fn close_regions_instruction(region_ids: impl IntoIterator<Item = RegionId>) -> Instruction {
let region_idents = region_ids
.into_iter()
.map(|region_id| RegionIdent {
table_id: region_id.table_id(),
region_number: region_id.region_number(),
datanode_id: 2,
engine: MITO_ENGINE_NAME.to_string(),
})
.collect();
Instruction::CloseRegions(region_idents)
}
#[tokio::test]
async fn test_close_regions() {
common_telemetry::init_default_ut_logging();
let mut region_server = mock_region_server();
let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
let mut engine_env = TestEnv::with_prefix("close-regions").await;
let engine = engine_env.create_engine(MitoConfig::default()).await;
region_server.register_engine(Arc::new(engine.clone()));
let region_id = RegionId::new(1024, 1);
let region_id1 = RegionId::new(1024, 2);
let builder = CreateRequestBuilder::new();
let create_req = builder.build();
region_server
.handle_request(region_id, RegionRequest::Create(create_req))
.await
.unwrap();
let create_req1 = builder.build();
region_server
.handle_request(region_id1, RegionRequest::Create(create_req1))
.await
.unwrap();
let meta = MessageMeta::new_test(1, "test", "dn-1", "meta-0");
let instruction =
close_regions_instruction([region_id, region_id1, RegionId::new(1024, 3)]);
let mut heartbeat_env = HeartbeatResponseTestEnv::new();
let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
assert_matches!(control, HandleControl::Continue);
let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
let reply = reply.expect_close_regions_reply();
assert!(reply.result);
assert!(reply.error.is_none());
assert!(!engine.is_region_exists(region_id));
assert!(!engine.is_region_exists(region_id1));
assert!(!engine.is_region_exists(RegionId::new(1024, 3)));
}
}

View File

@@ -16,39 +16,146 @@ use common_meta::instruction::{InstructionReply, OpenRegion, SimpleReply};
use common_meta::wal_options_allocator::prepare_wal_options;
use futures_util::future::BoxFuture;
use store_api::path_utils::table_dir;
use store_api::region_request::{PathType, RegionOpenRequest, RegionRequest};
use store_api::region_request::{PathType, RegionOpenRequest};
use crate::heartbeat::handler::HandlerContext;
impl HandlerContext {
pub(crate) fn handle_open_region_instruction(
pub(crate) fn handle_open_regions_instruction(
self,
OpenRegion {
region_ident,
region_storage_path,
mut region_options,
region_wal_options,
skip_wal_replay,
}: OpenRegion,
open_regions: Vec<OpenRegion>,
open_region_parallelism: usize,
) -> BoxFuture<'static, Option<InstructionReply>> {
Box::pin(async move {
let region_id = Self::region_ident_to_region_id(&region_ident);
prepare_wal_options(&mut region_options, region_id, &region_wal_options);
let request = RegionRequest::Open(RegionOpenRequest {
engine: region_ident.engine,
table_dir: table_dir(&region_storage_path, region_id.table_id()),
path_type: PathType::Bare,
options: region_options,
skip_wal_replay,
checkpoint: None,
});
let result = self.region_server.handle_request(region_id, request).await;
let requests = open_regions
.into_iter()
.map(|open_region| {
let OpenRegion {
region_ident,
region_storage_path,
mut region_options,
region_wal_options,
skip_wal_replay,
} = open_region;
let region_id = Self::region_ident_to_region_id(&region_ident);
prepare_wal_options(&mut region_options, region_id, &region_wal_options);
let request = RegionOpenRequest {
engine: region_ident.engine,
table_dir: table_dir(&region_storage_path, region_id.table_id()),
path_type: PathType::Bare,
options: region_options,
skip_wal_replay,
checkpoint: None,
};
(region_id, request)
})
.collect::<Vec<_>>();
let result = self
.region_server
.handle_batch_open_requests(open_region_parallelism, requests, false)
.await;
let success = result.is_ok();
let error = result.as_ref().map_err(|e| format!("{e:?}")).err();
Some(InstructionReply::OpenRegion(SimpleReply {
Some(InstructionReply::OpenRegions(SimpleReply {
result: success,
error,
}))
})
}
}
#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;
use std::collections::HashMap;
use std::sync::Arc;
use common_meta::RegionIdent;
use common_meta::heartbeat::handler::{HandleControl, HeartbeatResponseHandler};
use common_meta::heartbeat::mailbox::MessageMeta;
use common_meta::instruction::{Instruction, OpenRegion};
use mito2::config::MitoConfig;
use mito2::engine::MITO_ENGINE_NAME;
use mito2::test_util::{CreateRequestBuilder, TestEnv};
use store_api::path_utils::table_dir;
use store_api::region_request::{RegionCloseRequest, RegionRequest};
use store_api::storage::RegionId;
use crate::heartbeat::handler::RegionHeartbeatResponseHandler;
use crate::heartbeat::handler::tests::HeartbeatResponseTestEnv;
use crate::tests::mock_region_server;
fn open_regions_instruction(
region_ids: impl IntoIterator<Item = RegionId>,
storage_path: &str,
) -> Instruction {
let region_idents = region_ids
.into_iter()
.map(|region_id| OpenRegion {
region_ident: RegionIdent {
datanode_id: 0,
table_id: region_id.table_id(),
region_number: region_id.region_number(),
engine: MITO_ENGINE_NAME.to_string(),
},
region_storage_path: storage_path.to_string(),
region_options: HashMap::new(),
region_wal_options: HashMap::new(),
skip_wal_replay: false,
})
.collect();
Instruction::OpenRegions(region_idents)
}
#[tokio::test]
async fn test_open_regions() {
common_telemetry::init_default_ut_logging();
let mut region_server = mock_region_server();
let heartbeat_handler = RegionHeartbeatResponseHandler::new(region_server.clone());
let mut engine_env = TestEnv::with_prefix("open-regions").await;
let engine = engine_env.create_engine(MitoConfig::default()).await;
region_server.register_engine(Arc::new(engine.clone()));
let region_id = RegionId::new(1024, 1);
let region_id1 = RegionId::new(1024, 2);
let storage_path = "test";
let builder = CreateRequestBuilder::new();
let mut create_req = builder.build();
create_req.table_dir = table_dir(storage_path, region_id.table_id());
region_server
.handle_request(region_id, RegionRequest::Create(create_req))
.await
.unwrap();
let mut create_req1 = builder.build();
create_req1.table_dir = table_dir(storage_path, region_id1.table_id());
region_server
.handle_request(region_id1, RegionRequest::Create(create_req1))
.await
.unwrap();
region_server
.handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
.await
.unwrap();
region_server
.handle_request(region_id, RegionRequest::Close(RegionCloseRequest {}))
.await
.unwrap();
let meta = MessageMeta::new_test(1, "test", "dn-1", "me-0");
let instruction = open_regions_instruction([region_id, region_id1], storage_path);
let mut heartbeat_env = HeartbeatResponseTestEnv::new();
let mut ctx = heartbeat_env.create_handler_ctx((meta, instruction));
let control = heartbeat_handler.handle(&mut ctx).await.unwrap();
assert_matches!(control, HandleControl::Continue);
let (_, reply) = heartbeat_env.receiver.recv().await.unwrap();
let reply = reply.expect_open_regions_reply();
assert!(reply.result);
assert!(reply.error.is_none());
assert!(engine.is_region_exists(region_id));
assert!(engine.is_region_exists(region_id1));
}
}

View File

@@ -1043,7 +1043,8 @@ impl RegionServerInner {
RegionRequest::Alter(_)
| RegionRequest::Flush(_)
| RegionRequest::Compact(_)
| RegionRequest::Truncate(_) => RegionChange::None,
| RegionRequest::Truncate(_)
| RegionRequest::BuildIndex(_) => RegionChange::None,
RegionRequest::Catchup(_) => RegionChange::Catchup,
};

View File

@@ -27,7 +27,7 @@ use datafusion_expr::{LogicalPlan, TableSource};
use futures::TryStreamExt;
use session::context::QueryContextRef;
use snafu::{OptionExt, ResultExt};
use store_api::sst_entry::{ManifestSstEntry, StorageSstEntry};
use store_api::sst_entry::{ManifestSstEntry, PuffinIndexMetaEntry, StorageSstEntry};
use store_api::storage::RegionId;
use crate::error::{DataFusionSnafu, ListStorageSstsSnafu, Result, UnexpectedSnafu};
@@ -35,10 +35,12 @@ use crate::region_server::RegionServer;
/// Reserved internal table kinds used.
/// These are recognized by reserved table names and mapped to providers.
#[allow(clippy::enum_variant_names)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, Copy)]
enum InternalTableKind {
InspectSstManifest,
InspectSstStorage,
InspectSstIndexMeta,
}
impl InternalTableKind {
@@ -50,6 +52,9 @@ impl InternalTableKind {
if name.eq_ignore_ascii_case(StorageSstEntry::reserved_table_name_for_inspection()) {
return Some(Self::InspectSstStorage);
}
if name.eq_ignore_ascii_case(PuffinIndexMetaEntry::reserved_table_name_for_inspection()) {
return Some(Self::InspectSstIndexMeta);
}
None
}
@@ -58,6 +63,7 @@ impl InternalTableKind {
match self {
Self::InspectSstManifest => server.inspect_sst_manifest_provider().await,
Self::InspectSstStorage => server.inspect_sst_storage_provider().await,
Self::InspectSstIndexMeta => server.inspect_sst_index_meta_provider().await,
}
}
}
@@ -103,6 +109,25 @@ impl RegionServer {
let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
Ok(Arc::new(table))
}
/// Expose index metadata across the engine as an in-memory table.
pub async fn inspect_sst_index_meta_provider(&self) -> Result<Arc<dyn TableProvider>> {
let mito = {
let guard = self.inner.mito_engine.read().unwrap();
guard.as_ref().cloned().context(UnexpectedSnafu {
violated: "mito engine not available",
})?
};
let entries = mito.all_index_metas().await;
let schema = PuffinIndexMetaEntry::schema().arrow_schema().clone();
let batch = PuffinIndexMetaEntry::to_record_batch(&entries)
.map_err(DataFusionError::from)
.context(DataFusionSnafu)?;
let table = MemTable::try_new(schema, vec![vec![batch]]).context(DataFusionSnafu)?;
Ok(Arc::new(table))
}
}
/// A catalog list that resolves `TableProvider` by table name:

View File

@@ -16,3 +16,5 @@ pub type BinaryArray = arrow::array::BinaryArray;
pub type MutableBinaryArray = arrow::array::BinaryBuilder;
pub type StringArray = arrow::array::StringArray;
pub type MutableStringArray = arrow::array::StringBuilder;
pub type LargeStringArray = arrow::array::LargeStringArray;
pub type MutableLargeStringArray = arrow::array::LargeStringBuilder;

View File

@@ -33,8 +33,8 @@ use crate::types::{
BinaryType, BooleanType, DateType, Decimal128Type, DictionaryType, DurationMicrosecondType,
DurationMillisecondType, DurationNanosecondType, DurationSecondType, DurationType, Float32Type,
Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, IntervalDayTimeType,
IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonType, ListType, NullType,
StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType, JsonFormat, JsonType, ListType,
NullType, StringType, StructType, TimeMillisecondType, TimeType, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, TimestampType,
UInt8Type, UInt16Type, UInt32Type, UInt64Type, VectorType,
};
@@ -350,7 +350,7 @@ impl ConcreteDataType {
pub fn as_json(&self) -> Option<JsonType> {
match self {
ConcreteDataType::Json(j) => Some(*j),
ConcreteDataType::Json(j) => Some(j.clone()),
_ => None,
}
}
@@ -454,12 +454,11 @@ impl TryFrom<&ArrowDataType> for ConcreteDataType {
ArrowDataType::Binary | ArrowDataType::LargeBinary | ArrowDataType::BinaryView => {
Self::binary_datatype()
}
ArrowDataType::Utf8 | ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
Self::string_datatype()
}
ArrowDataType::List(field) => Self::List(ListType::new(
ArrowDataType::Utf8 | ArrowDataType::Utf8View => Self::string_datatype(),
ArrowDataType::LargeUtf8 => Self::large_string_datatype(),
ArrowDataType::List(field) => Self::List(ListType::new(Arc::new(
ConcreteDataType::from_arrow_type(field.data_type()),
)),
))),
ArrowDataType::Dictionary(key_type, value_type) => {
let key_type = ConcreteDataType::from_arrow_type(key_type);
let value_type = ConcreteDataType::from_arrow_type(value_type);
@@ -518,6 +517,10 @@ impl_new_concrete_type_functions!(
);
impl ConcreteDataType {
pub fn large_string_datatype() -> Self {
ConcreteDataType::String(StringType::large_utf8())
}
pub fn timestamp_second_datatype() -> Self {
ConcreteDataType::Timestamp(TimestampType::Second(TimestampSecondType))
}
@@ -638,7 +641,7 @@ impl ConcreteDataType {
}
}
pub fn list_datatype(item_type: ConcreteDataType) -> ConcreteDataType {
pub fn list_datatype(item_type: Arc<ConcreteDataType>) -> ConcreteDataType {
ConcreteDataType::List(ListType::new(item_type))
}
@@ -668,6 +671,10 @@ impl ConcreteDataType {
pub fn vector_default_datatype() -> ConcreteDataType {
Self::vector_datatype(0)
}
pub fn json_native_datatype(inner_type: ConcreteDataType) -> ConcreteDataType {
ConcreteDataType::Json(JsonType::new(JsonFormat::Native(Box::new(inner_type))))
}
}
/// Data type abstraction.
@@ -773,13 +780,21 @@ mod tests {
ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8),
ConcreteDataType::String(_)
));
// Test LargeUtf8 mapping to large String type
let large_string_type = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
assert!(matches!(large_string_type, ConcreteDataType::String(_)));
if let ConcreteDataType::String(string_type) = &large_string_type {
assert!(string_type.is_large());
} else {
panic!("Expected a String type");
}
assert_eq!(
ConcreteDataType::from_arrow_type(&ArrowDataType::List(Arc::new(Field::new(
"item",
ArrowDataType::Int32,
true,
)))),
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype()))
ConcreteDataType::List(ListType::new(Arc::new(ConcreteDataType::int32_datatype())))
);
assert!(matches!(
ConcreteDataType::from_arrow_type(&ArrowDataType::Date32),
@@ -787,6 +802,38 @@ mod tests {
));
}
#[test]
fn test_large_utf8_round_trip() {
// Test round-trip conversion for LargeUtf8
let large_utf8_arrow = ArrowDataType::LargeUtf8;
let concrete_type = ConcreteDataType::from_arrow_type(&large_utf8_arrow);
let back_to_arrow = concrete_type.as_arrow_type();
assert!(matches!(concrete_type, ConcreteDataType::String(_)));
// Round-trip should preserve the LargeUtf8 type
assert_eq!(large_utf8_arrow, back_to_arrow);
// Test that Utf8 and LargeUtf8 map to different string variants
let utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::Utf8);
let large_utf8_concrete = ConcreteDataType::from_arrow_type(&ArrowDataType::LargeUtf8);
assert!(matches!(utf8_concrete, ConcreteDataType::String(_)));
assert!(matches!(large_utf8_concrete, ConcreteDataType::String(_)));
// They should have different size types
if let (ConcreteDataType::String(utf8_type), ConcreteDataType::String(large_type)) =
(&utf8_concrete, &large_utf8_concrete)
{
assert!(!utf8_type.is_large());
assert!(large_type.is_large());
} else {
panic!("Expected both to be String types");
}
// They should be different types
assert_ne!(utf8_concrete, large_utf8_concrete);
}
#[test]
fn test_from_arrow_timestamp() {
assert_eq!(
@@ -938,9 +985,10 @@ mod tests {
#[test]
fn test_as_list() {
let list_type = ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype());
let list_type =
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()));
assert_eq!(
ListType::new(ConcreteDataType::int32_datatype()),
ListType::new(Arc::new(ConcreteDataType::int32_datatype())),
*list_type.as_list().unwrap()
);
assert!(ConcreteDataType::int32_datatype().as_list().is_none());
@@ -985,21 +1033,24 @@ mod tests {
);
// Nested types
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()).to_string(),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
.to_string(),
"List<Int32>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::Dictionary(DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::Dictionary(
DictionaryType::new(
ConcreteDataType::int32_datatype(),
ConcreteDataType::string_datatype()
)
)))
.to_string(),
"List<Dictionary<Int32, String>>"
);
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::list_datatype(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
))
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype()))
))))
.to_string(),
"List<List<List<Int32>>>"
);

2332
src/datatypes/src/json.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -19,6 +19,7 @@ pub mod data_type;
pub mod duration;
pub mod error;
pub mod interval;
pub mod json;
pub mod macros;
pub mod prelude;
pub mod scalars;

View File

@@ -380,6 +380,8 @@ impl<'a> ScalarRef<'a> for StructValueRef<'a> {
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::data_type::ConcreteDataType;
use crate::timestamp::TimestampSecond;
@@ -451,14 +453,13 @@ mod tests {
#[test]
fn test_list_value_scalar() {
let list_value =
ListValue::new(vec![Value::Int32(123)], ConcreteDataType::int32_datatype());
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_value = ListValue::new(vec![Value::Int32(123)], item_type.clone());
let list_ref = ListValueRef::Ref { val: &list_value };
assert_eq!(list_ref, list_value.as_scalar_ref());
assert_eq!(list_value, list_ref.to_owned_scalar());
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1);
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
builder.push(None);
builder.push(Some(list_value.as_scalar_ref()));
let vector = builder.finish();

View File

@@ -80,7 +80,10 @@ impl LogicalTypeId {
/// Panics if data type is not supported.
#[cfg(any(test, feature = "test"))]
pub fn data_type(&self) -> crate::data_type::ConcreteDataType {
use std::sync::Arc;
use crate::data_type::ConcreteDataType;
use crate::types::StructType;
match self {
LogicalTypeId::Null => ConcreteDataType::null_datatype(),
@@ -107,9 +110,11 @@ impl LogicalTypeId {
}
LogicalTypeId::TimestampNanosecond => ConcreteDataType::timestamp_nanosecond_datatype(),
LogicalTypeId::List => {
ConcreteDataType::list_datatype(ConcreteDataType::null_datatype())
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype()))
}
LogicalTypeId::Struct => {
ConcreteDataType::struct_datatype(StructType::new(Arc::new(vec![])))
}
LogicalTypeId::Struct => ConcreteDataType::struct_datatype(vec![].into()),
LogicalTypeId::Dictionary => ConcreteDataType::dictionary_datatype(
ConcreteDataType::null_datatype(),
ConcreteDataType::null_datatype(),

View File

@@ -44,8 +44,8 @@ pub use interval_type::{
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalType, IntervalYearMonthType,
};
pub use json_type::{
JSON_TYPE_NAME, JsonType, json_type_value_to_serde_json, json_type_value_to_string,
parse_string_to_json_type_value,
JSON_TYPE_NAME, JsonFormat, JsonType, jsonb_to_serde_json, jsonb_to_string,
parse_string_to_jsonb,
};
pub use list_type::ListType;
pub use null_type::NullType;
@@ -53,7 +53,7 @@ pub use primitive_type::{
Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, LogicalPrimitiveType,
OrdPrimitive, UInt8Type, UInt16Type, UInt32Type, UInt64Type, WrapperType,
};
pub use string_type::StringType;
pub use string_type::{StringSizeType, StringType};
pub use struct_type::{StructField, StructType};
pub use time_type::{
TimeMicrosecondType, TimeMillisecondType, TimeNanosecondType, TimeSecondType, TimeType,

View File

@@ -104,7 +104,7 @@ pub fn can_cast_type(src_value: &Value, dest_type: &ConcreteDataType) -> bool {
(_, Boolean(_)) => src_type.is_numeric() || src_type.is_string(),
(Boolean(_), _) => dest_type.is_numeric() || dest_type.is_string(),
// numeric types cast
// numeric and string types cast
(
UInt8(_) | UInt16(_) | UInt32(_) | UInt64(_) | Int8(_) | Int16(_) | Int32(_) | Int64(_)
| Float32(_) | Float64(_) | String(_),

View File

@@ -21,6 +21,7 @@ use snafu::ResultExt;
use crate::data_type::DataType;
use crate::error::{DeserializeSnafu, InvalidJsonSnafu, InvalidJsonbSnafu, Result};
use crate::prelude::ConcreteDataType;
use crate::scalars::ScalarVectorBuilder;
use crate::type_id::LogicalTypeId;
use crate::value::Value;
@@ -28,19 +29,16 @@ use crate::vectors::{BinaryVectorBuilder, MutableVector};
pub const JSON_TYPE_NAME: &str = "Json";
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default)]
pub enum JsonFormat {
#[default]
Jsonb,
Native(Box<ConcreteDataType>),
}
/// JsonType is a data type for JSON data. It is stored as binary data of jsonb format.
/// It utilizes current binary value and vector implementation.
#[derive(
Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize,
)]
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct JsonType {
pub format: JsonFormat,
}
@@ -81,34 +79,26 @@ impl DataType for JsonType {
}
/// Converts a json type value to string
pub fn json_type_value_to_string(val: &[u8], format: &JsonFormat) -> Result<String> {
match format {
JsonFormat::Jsonb => match jsonb::from_slice(val) {
Ok(jsonb_value) => {
let serialized = jsonb_value.to_string();
Ok(serialized)
}
Err(e) => InvalidJsonbSnafu { error: e }.fail(),
},
pub fn jsonb_to_string(val: &[u8]) -> Result<String> {
match jsonb::from_slice(val) {
Ok(jsonb_value) => {
let serialized = jsonb_value.to_string();
Ok(serialized)
}
Err(e) => InvalidJsonbSnafu { error: e }.fail(),
}
}
/// Converts a json type value to serde_json::Value
pub fn json_type_value_to_serde_json(val: &[u8], format: &JsonFormat) -> Result<serde_json::Value> {
match format {
JsonFormat::Jsonb => {
let json_string = json_type_value_to_string(val, format)?;
serde_json::Value::from_str(json_string.as_str())
.context(DeserializeSnafu { json: json_string })
}
}
pub fn jsonb_to_serde_json(val: &[u8]) -> Result<serde_json::Value> {
let json_string = jsonb_to_string(val)?;
serde_json::Value::from_str(json_string.as_str())
.context(DeserializeSnafu { json: json_string })
}
/// Parses a string to a json type value
pub fn parse_string_to_json_type_value(s: &str, format: &JsonFormat) -> Result<Vec<u8>> {
match format {
JsonFormat::Jsonb => jsonb::parse_value(s.as_bytes())
.map_err(|_| InvalidJsonSnafu { value: s }.build())
.map(|json| json.to_vec()),
}
pub fn parse_string_to_jsonb(s: &str) -> Result<Vec<u8>> {
jsonb::parse_value(s.as_bytes())
.map_err(|_| InvalidJsonSnafu { value: s }.build())
.map(|json| json.to_vec())
}

View File

@@ -26,22 +26,19 @@ use crate::vectors::{ListVectorBuilder, MutableVector};
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct ListType {
/// The type of List's item.
// Use Box to avoid recursive dependency, as enum ConcreteDataType depends on ListType.
item_type: Box<ConcreteDataType>,
item_type: Arc<ConcreteDataType>,
}
impl Default for ListType {
fn default() -> Self {
ListType::new(ConcreteDataType::null_datatype())
ListType::new(Arc::new(ConcreteDataType::null_datatype()))
}
}
impl ListType {
/// Create a new `ListType` whose item's data type is `item_type`.
pub fn new(item_type: ConcreteDataType) -> Self {
ListType {
item_type: Box::new(item_type),
}
pub fn new(item_type: Arc<ConcreteDataType>) -> Self {
ListType { item_type }
}
/// Returns the item data type.
@@ -61,7 +58,7 @@ impl DataType for ListType {
}
fn default_value(&self) -> Value {
Value::List(ListValue::new(vec![], *self.item_type.clone()))
Value::List(ListValue::new(vec![], self.item_type.clone()))
}
fn as_arrow_type(&self) -> ArrowDataType {
@@ -75,7 +72,7 @@ impl DataType for ListType {
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(ListVectorBuilder::with_type_capacity(
*self.item_type.clone(),
self.item_type.clone(),
capacity,
))
}
@@ -95,11 +92,14 @@ mod tests {
#[test]
fn test_list_type() {
let t = ListType::new(ConcreteDataType::boolean_datatype());
let t = ListType::new(Arc::new(ConcreteDataType::boolean_datatype()));
assert_eq!("List<Boolean>", t.name());
assert_eq!(LogicalTypeId::List, t.logical_type_id());
assert_eq!(
Value::List(ListValue::new(vec![], ConcreteDataType::boolean_datatype())),
Value::List(ListValue::new(
vec![],
Arc::new(ConcreteDataType::boolean_datatype())
)),
t.default_value()
);
assert_eq!(

View File

@@ -19,17 +19,97 @@ use common_base::bytes::StringBytes;
use serde::{Deserialize, Serialize};
use crate::data_type::{DataType, DataTypeRef};
use crate::prelude::ScalarVectorBuilder;
use crate::type_id::LogicalTypeId;
use crate::value::Value;
use crate::vectors::{MutableVector, StringVectorBuilder};
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StringType;
/// String size variant to distinguish between UTF8 and LargeUTF8
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, Default,
)]
pub enum StringSizeType {
/// Regular UTF8 strings (up to 2GB)
#[default]
Utf8,
/// Large UTF8 strings (up to 2^63 bytes)
LargeUtf8,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
pub struct StringType {
#[serde(default)]
size_type: StringSizeType,
}
/// Custom deserialization to support both old and new formats.
impl<'de> serde::Deserialize<'de> for StringType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
#[derive(serde::Deserialize)]
struct Helper {
#[serde(default)]
size_type: StringSizeType,
}
let opt = Option::<Helper>::deserialize(deserializer)?;
Ok(match opt {
Some(helper) => Self {
size_type: helper.size_type,
},
None => Self::default(),
})
}
}
impl Default for StringType {
fn default() -> Self {
Self {
size_type: StringSizeType::Utf8,
}
}
}
impl StringType {
/// Create a new StringType with default (Utf8) size
pub fn new() -> Self {
Self {
size_type: StringSizeType::Utf8,
}
}
/// Create a new StringType with specified size
pub fn with_size(size_type: StringSizeType) -> Self {
Self { size_type }
}
/// Create a StringType for regular UTF8 strings
pub fn utf8() -> Self {
Self::with_size(StringSizeType::Utf8)
}
/// Create a StringType for large UTF8 strings
pub fn large_utf8() -> Self {
Self::with_size(StringSizeType::LargeUtf8)
}
/// Get the size type
pub fn size_type(&self) -> StringSizeType {
self.size_type
}
/// Check if this is a large UTF8 string type
pub fn is_large(&self) -> bool {
matches!(self.size_type, StringSizeType::LargeUtf8)
}
pub fn arc() -> DataTypeRef {
Arc::new(Self)
Arc::new(Self::new())
}
pub fn large_arc() -> DataTypeRef {
Arc::new(Self::large_utf8())
}
}
@@ -47,11 +127,19 @@ impl DataType for StringType {
}
fn as_arrow_type(&self) -> ArrowDataType {
ArrowDataType::Utf8
match self.size_type {
StringSizeType::Utf8 => ArrowDataType::Utf8,
StringSizeType::LargeUtf8 => ArrowDataType::LargeUtf8,
}
}
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector> {
Box::new(StringVectorBuilder::with_capacity(capacity))
match self.size_type {
StringSizeType::Utf8 => Box::new(StringVectorBuilder::with_string_capacity(capacity)),
StringSizeType::LargeUtf8 => {
Box::new(StringVectorBuilder::with_large_capacity(capacity))
}
}
}
fn try_cast(&self, from: Value) -> Option<Value> {
@@ -89,6 +177,8 @@ impl DataType for StringType {
Value::Duration(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Decimal128(v) => Some(Value::String(StringBytes::from(v.to_string()))),
Value::Json(v) => self.try_cast(*v),
// StringBytes is only support for utf-8, Value::Binary and collections are not allowed.
Value::Binary(_) | Value::List(_) | Value::Struct(_) => None,
}

View File

@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use arrow::datatypes::{DataType as ArrowDataType, Field};
use arrow_schema::Fields;
use serde::{Deserialize, Serialize};
@@ -22,7 +24,7 @@ use crate::vectors::StructVectorBuilder;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StructType {
fields: Vec<StructField>,
fields: Arc<Vec<StructField>>,
}
impl TryFrom<&Fields> for StructType {
@@ -38,13 +40,9 @@ impl TryFrom<&Fields> for StructType {
))
})
.collect::<Result<Vec<StructField>, Self::Error>>()?;
Ok(StructType { fields })
}
}
impl From<Vec<StructField>> for StructType {
fn from(fields: Vec<StructField>) -> Self {
StructType { fields }
Ok(StructType {
fields: Arc::new(fields),
})
}
}
@@ -87,12 +85,14 @@ impl DataType for StructType {
}
impl StructType {
pub fn new(fields: Vec<StructField>) -> Self {
StructType { fields }
pub fn new(fields: Arc<Vec<StructField>>) -> Self {
StructType {
fields: fields.clone(),
}
}
pub fn fields(&self) -> &[StructField] {
&self.fields
pub fn fields(&self) -> Arc<Vec<StructField>> {
self.fields.clone()
}
pub fn as_arrow_fields(&self) -> Fields {
@@ -123,6 +123,10 @@ impl StructField {
&self.name
}
pub fn take_name(self) -> String {
self.name
}
pub fn data_type(&self) -> &ConcreteDataType {
&self.data_type
}

View File

@@ -81,8 +81,12 @@ pub enum Value {
IntervalDayTime(IntervalDayTime),
IntervalMonthDayNano(IntervalMonthDayNano),
// Collection types:
List(ListValue),
Struct(StructValue),
// Json Logical types:
Json(Box<Value>),
}
impl Display for Value {
@@ -144,6 +148,9 @@ impl Display for Value {
.join(", ");
write!(f, "{{ {items} }}")
}
Value::Json(json_data) => {
write!(f, "Json({})", json_data)
}
}
}
}
@@ -190,6 +197,7 @@ macro_rules! define_data_type_func {
$struct::Struct(struct_value) => {
ConcreteDataType::struct_datatype(struct_value.struct_type().clone())
}
$struct::Json(v) => ConcreteDataType::json_native_datatype(v.data_type()),
}
}
};
@@ -200,7 +208,11 @@ impl Value {
/// Returns true if this is a null value.
pub fn is_null(&self) -> bool {
matches!(self, Value::Null)
match self {
Value::Null => true,
Value::Json(inner) => inner.is_null(),
_ => false,
}
}
/// Cast itself to [ListValue].
@@ -208,6 +220,7 @@ impl Value {
match self {
Value::Null => Ok(None),
Value::List(v) => Ok(Some(v)),
Value::Json(inner) => inner.as_list(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast {other:?} to list value"),
}
@@ -219,6 +232,7 @@ impl Value {
match self {
Value::Null => Ok(None),
Value::Struct(v) => Ok(Some(v)),
Value::Json(inner) => inner.as_struct(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast {other:?} to struct value"),
}
@@ -253,6 +267,7 @@ impl Value {
Value::Duration(v) => ValueRef::Duration(*v),
Value::Decimal128(v) => ValueRef::Decimal128(*v),
Value::Struct(v) => ValueRef::Struct(StructValueRef::Ref(v)),
Value::Json(v) => ValueRef::Json(Box::new(v.as_value_ref())),
}
}
@@ -322,6 +337,7 @@ impl Value {
Value::UInt8(v) => Some(*v as _),
Value::UInt16(v) => Some(*v as _),
Value::UInt32(v) => Some(*v as _),
Value::Json(inner) => inner.as_i64(),
_ => None,
}
}
@@ -333,6 +349,7 @@ impl Value {
Value::UInt16(v) => Some(*v as _),
Value::UInt32(v) => Some(*v as _),
Value::UInt64(v) => Some(*v),
Value::Json(inner) => inner.as_u64(),
_ => None,
}
}
@@ -349,6 +366,7 @@ impl Value {
Value::UInt16(v) => Some(*v as _),
Value::UInt32(v) => Some(*v as _),
Value::UInt64(v) => Some(*v as _),
Value::Json(inner) => inner.as_f64_lossy(),
_ => None,
}
}
@@ -365,6 +383,15 @@ impl Value {
pub fn as_bool(&self) -> Option<bool> {
match self {
Value::Boolean(b) => Some(*b),
Value::Json(inner) => inner.as_bool(),
_ => None,
}
}
/// Extract the inner JSON value from a JSON type.
pub fn into_json_inner(self) -> Option<Value> {
match self {
Value::Json(v) => Some(*v),
_ => None,
}
}
@@ -411,6 +438,7 @@ impl Value {
},
Value::Decimal128(_) => LogicalTypeId::Decimal128,
Value::Struct(_) => LogicalTypeId::Struct,
Value::Json(_) => LogicalTypeId::Json,
}
}
@@ -420,11 +448,11 @@ impl Value {
let value_type_id = self.logical_type_id();
let output_type_id = output_type.logical_type_id();
ensure!(
// Json type leverage Value(Binary) for storage.
output_type_id == value_type_id
|| self.is_null()
|| (output_type_id == LogicalTypeId::Json
&& value_type_id == LogicalTypeId::Binary),
&& (value_type_id == LogicalTypeId::Binary
|| value_type_id == LogicalTypeId::Json)),
error::ToScalarValueSnafu {
reason: format!(
"expect value to return output_type {output_type_id:?}, actual: {value_type_id:?}",
@@ -444,7 +472,13 @@ impl Value {
Value::Int64(v) => ScalarValue::Int64(Some(*v)),
Value::Float32(v) => ScalarValue::Float32(Some(v.0)),
Value::Float64(v) => ScalarValue::Float64(Some(v.0)),
Value::String(v) => ScalarValue::Utf8(Some(v.as_utf8().to_string())),
Value::String(v) => {
let s = v.as_utf8().to_string();
match output_type {
ConcreteDataType::String(t) if t.is_large() => ScalarValue::LargeUtf8(Some(s)),
_ => ScalarValue::Utf8(Some(s)),
}
}
Value::Binary(v) => ScalarValue::Binary(Some(v.to_vec())),
Value::Date(v) => ScalarValue::Date32(Some(v.val())),
Value::Null => to_null_scalar_value(output_type)?,
@@ -467,6 +501,7 @@ impl Value {
let struct_type = output_type.as_struct().unwrap();
struct_value.try_to_scalar_value(struct_type)?
}
Value::Json(v) => v.try_to_scalar_value(output_type)?,
};
Ok(scalar_value)
@@ -519,6 +554,8 @@ impl Value {
Value::IntervalDayTime(x) => Some(Value::IntervalDayTime(x.negative())),
Value::IntervalMonthDayNano(x) => Some(Value::IntervalMonthDayNano(x.negative())),
Value::Json(v) => v.try_negative().map(|neg| Value::Json(Box::new(neg))),
Value::Binary(_)
| Value::String(_)
| Value::Boolean(_)
@@ -575,7 +612,13 @@ pub fn to_null_scalar_value(output_type: &ConcreteDataType) -> Result<ScalarValu
ConcreteDataType::Binary(_) | ConcreteDataType::Json(_) | ConcreteDataType::Vector(_) => {
ScalarValue::Binary(None)
}
ConcreteDataType::String(_) => ScalarValue::Utf8(None),
ConcreteDataType::String(t) => {
if t.is_large() {
ScalarValue::LargeUtf8(None)
} else {
ScalarValue::Utf8(None)
}
}
ConcreteDataType::Date(_) => ScalarValue::Date32(None),
ConcreteDataType::Timestamp(t) => timestamp_to_scalar_value(t.unit(), None),
ConcreteDataType::Interval(v) => match v {
@@ -866,12 +909,11 @@ impl TryFrom<Value> for serde_json::Value {
Value::Duration(v) => serde_json::to_value(v.value())?,
Value::Decimal128(v) => serde_json::to_value(v.to_string())?,
Value::Struct(v) => {
let map = v
.fields
.clone() // TODO:(sunng87) remove in next patch when into_parts is merged
let (items, struct_type) = v.into_parts();
let map = struct_type
.fields()
.iter()
.zip(v.take_items().into_iter())
.zip(items.into_iter())
.map(|(field, value)| {
Ok((
field.name().to_string(),
@@ -881,6 +923,7 @@ impl TryFrom<Value> for serde_json::Value {
.collect::<serde_json::Result<Map<String, serde_json::Value>>>()?;
serde_json::Value::Object(map)
}
Value::Json(v) => serde_json::Value::try_from(*v)?,
};
Ok(json_value)
@@ -894,13 +937,13 @@ pub struct ListValue {
items: Vec<Value>,
/// Inner values datatype, to distinguish empty lists of different datatypes.
/// Restricted by DataFusion, cannot use null datatype for empty list.
datatype: ConcreteDataType,
datatype: Arc<ConcreteDataType>,
}
impl Eq for ListValue {}
impl ListValue {
pub fn new(items: Vec<Value>, datatype: ConcreteDataType) -> Self {
pub fn new(items: Vec<Value>, datatype: Arc<ConcreteDataType>) -> Self {
Self { items, datatype }
}
@@ -912,11 +955,24 @@ impl ListValue {
self.items
}
pub fn datatype(&self) -> &ConcreteDataType {
&self.datatype
pub fn into_parts(self) -> (Vec<Value>, Arc<ConcreteDataType>) {
(self.items, self.datatype)
}
fn try_to_scalar_value(&self, output_type: &ListType) -> Result<ScalarValue> {
/// List value's inner type data type
pub fn datatype(&self) -> Arc<ConcreteDataType> {
self.datatype.clone()
}
pub fn len(&self) -> usize {
self.items.len()
}
pub fn is_empty(&self) -> bool {
self.items.is_empty()
}
pub fn try_to_scalar_value(&self, output_type: &ListType) -> Result<ScalarValue> {
let vs = self
.items
.iter()
@@ -936,12 +992,13 @@ impl ListValue {
.first()
.map(|x| x.as_value_ref().data_size() * self.items.len())
.unwrap_or(0)
+ std::mem::size_of::<Arc<ConcreteDataType>>()
}
}
impl Default for ListValue {
fn default() -> ListValue {
ListValue::new(vec![], ConcreteDataType::null_datatype())
ListValue::new(vec![], Arc::new(ConcreteDataType::null_datatype()))
}
}
@@ -979,6 +1036,13 @@ impl StructValue {
Ok(Self { items, fields })
}
/// Create a new struct value.
///
/// Panics if the number of items does not match the number of fields.
pub fn new(items: Vec<Value>, fields: StructType) -> Self {
Self::try_new(items, fields).unwrap()
}
pub fn items(&self) -> &[Value] {
&self.items
}
@@ -987,15 +1051,28 @@ impl StructValue {
self.items
}
pub fn into_parts(self) -> (Vec<Value>, StructType) {
(self.items, self.fields)
}
pub fn struct_type(&self) -> &StructType {
&self.fields
}
pub fn len(&self) -> usize {
self.items.len()
}
pub fn is_empty(&self) -> bool {
self.items.is_empty()
}
fn estimated_size(&self) -> usize {
self.items
.iter()
.map(|x| x.as_value_ref().data_size())
.sum()
.sum::<usize>()
+ std::mem::size_of::<StructType>()
}
fn try_to_scalar_value(&self, output_type: &StructType) -> Result<ScalarValue> {
@@ -1018,7 +1095,7 @@ impl StructValue {
impl Default for StructValue {
fn default() -> StructValue {
StructValue::try_new(vec![], StructType::new(vec![])).unwrap()
StructValue::try_new(vec![], StructType::new(Arc::new(vec![]))).unwrap()
}
}
@@ -1065,7 +1142,7 @@ impl TryFrom<ScalarValue> for Value {
.flatten()
.map(|x| x.try_into())
.collect::<Result<Vec<Value>>>()?;
Value::List(ListValue::new(items, datatype))
Value::List(ListValue::new(items, Arc::new(datatype)))
}
ScalarValue::Date32(d) => d.map(|x| Value::Date(Date::new(x))).unwrap_or(Value::Null),
ScalarValue::TimestampSecond(t, _) => t
@@ -1178,6 +1255,7 @@ impl From<ValueRef<'_>> for Value {
ValueRef::List(v) => v.to_value(),
ValueRef::Decimal128(v) => Value::Decimal128(v),
ValueRef::Struct(v) => v.to_value(),
ValueRef::Json(v) => Value::Json(Box::new(Value::from(*v))),
}
}
}
@@ -1220,6 +1298,8 @@ pub enum ValueRef<'a> {
// Compound types:
List(ListValueRef<'a>),
Struct(StructValueRef<'a>),
Json(Box<ValueRef<'a>>),
}
macro_rules! impl_as_for_value_ref {
@@ -1227,6 +1307,18 @@ macro_rules! impl_as_for_value_ref {
match $value {
ValueRef::Null => Ok(None),
ValueRef::$Variant(v) => Ok(Some(v.clone())),
ValueRef::Json(v) => match v.as_ref() {
ValueRef::Null => Ok(None),
ValueRef::$Variant(v) => Ok(Some(v.clone())),
other => error::CastTypeSnafu {
msg: format!(
"Failed to cast value ref {:?} to {}",
other,
stringify!($Variant)
),
}
.fail(),
},
other => error::CastTypeSnafu {
msg: format!(
"Failed to cast value ref {:?} to {}",
@@ -1244,60 +1336,65 @@ impl<'a> ValueRef<'a> {
/// Returns true if this is null.
pub fn is_null(&self) -> bool {
matches!(self, ValueRef::Null)
match self {
ValueRef::Null => true,
ValueRef::Json(v) => v.is_null(),
_ => false,
}
}
/// Cast itself to binary slice.
pub fn as_binary(&self) -> Result<Option<&'a [u8]>> {
pub fn try_into_binary(&self) -> Result<Option<&'a [u8]>> {
impl_as_for_value_ref!(self, Binary)
}
/// Cast itself to string slice.
pub fn as_string(&self) -> Result<Option<&'a str>> {
pub fn try_into_string(&self) -> Result<Option<&'a str>> {
impl_as_for_value_ref!(self, String)
}
/// Cast itself to boolean.
pub fn as_boolean(&self) -> Result<Option<bool>> {
pub fn try_into_boolean(&self) -> Result<Option<bool>> {
impl_as_for_value_ref!(self, Boolean)
}
pub fn as_i8(&self) -> Result<Option<i8>> {
pub fn try_into_i8(&self) -> Result<Option<i8>> {
impl_as_for_value_ref!(self, Int8)
}
pub fn as_u8(&self) -> Result<Option<u8>> {
pub fn try_into_u8(&self) -> Result<Option<u8>> {
impl_as_for_value_ref!(self, UInt8)
}
pub fn as_i16(&self) -> Result<Option<i16>> {
pub fn try_into_i16(&self) -> Result<Option<i16>> {
impl_as_for_value_ref!(self, Int16)
}
pub fn as_u16(&self) -> Result<Option<u16>> {
pub fn try_into_u16(&self) -> Result<Option<u16>> {
impl_as_for_value_ref!(self, UInt16)
}
pub fn as_i32(&self) -> Result<Option<i32>> {
pub fn try_into_i32(&self) -> Result<Option<i32>> {
impl_as_for_value_ref!(self, Int32)
}
pub fn as_u32(&self) -> Result<Option<u32>> {
pub fn try_into_u32(&self) -> Result<Option<u32>> {
impl_as_for_value_ref!(self, UInt32)
}
pub fn as_i64(&self) -> Result<Option<i64>> {
pub fn try_into_i64(&self) -> Result<Option<i64>> {
impl_as_for_value_ref!(self, Int64)
}
pub fn as_u64(&self) -> Result<Option<u64>> {
pub fn try_into_u64(&self) -> Result<Option<u64>> {
impl_as_for_value_ref!(self, UInt64)
}
pub fn as_f32(&self) -> Result<Option<f32>> {
pub fn try_into_f32(&self) -> Result<Option<f32>> {
match self {
ValueRef::Null => Ok(None),
ValueRef::Float32(f) => Ok(Some(f.0)),
ValueRef::Json(v) => v.try_into_f32(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast value ref {:?} to ValueRef::Float32", other,),
}
@@ -1305,10 +1402,11 @@ impl<'a> ValueRef<'a> {
}
}
pub fn as_f64(&self) -> Result<Option<f64>> {
pub fn try_into_f64(&self) -> Result<Option<f64>> {
match self {
ValueRef::Null => Ok(None),
ValueRef::Float64(f) => Ok(Some(f.0)),
ValueRef::Json(v) => v.try_into_f64(),
other => error::CastTypeSnafu {
msg: format!("Failed to cast value ref {:?} to ValueRef::Float64", other,),
}
@@ -1317,50 +1415,51 @@ impl<'a> ValueRef<'a> {
}
/// Cast itself to [Date].
pub fn as_date(&self) -> Result<Option<Date>> {
pub fn try_into_date(&self) -> Result<Option<Date>> {
impl_as_for_value_ref!(self, Date)
}
/// Cast itself to [Timestamp].
pub fn as_timestamp(&self) -> Result<Option<Timestamp>> {
pub fn try_into_timestamp(&self) -> Result<Option<Timestamp>> {
impl_as_for_value_ref!(self, Timestamp)
}
/// Cast itself to [Time].
pub fn as_time(&self) -> Result<Option<Time>> {
pub fn try_into_time(&self) -> Result<Option<Time>> {
impl_as_for_value_ref!(self, Time)
}
pub fn as_duration(&self) -> Result<Option<Duration>> {
pub fn try_into_duration(&self) -> Result<Option<Duration>> {
impl_as_for_value_ref!(self, Duration)
}
/// Cast itself to [IntervalYearMonth].
pub fn as_interval_year_month(&self) -> Result<Option<IntervalYearMonth>> {
pub fn try_into_interval_year_month(&self) -> Result<Option<IntervalYearMonth>> {
impl_as_for_value_ref!(self, IntervalYearMonth)
}
/// Cast itself to [IntervalDayTime].
pub fn as_interval_day_time(&self) -> Result<Option<IntervalDayTime>> {
pub fn try_into_interval_day_time(&self) -> Result<Option<IntervalDayTime>> {
impl_as_for_value_ref!(self, IntervalDayTime)
}
/// Cast itself to [IntervalMonthDayNano].
pub fn as_interval_month_day_nano(&self) -> Result<Option<IntervalMonthDayNano>> {
pub fn try_into_interval_month_day_nano(&self) -> Result<Option<IntervalMonthDayNano>> {
impl_as_for_value_ref!(self, IntervalMonthDayNano)
}
/// Cast itself to [ListValueRef].
pub fn as_list(&self) -> Result<Option<ListValueRef<'_>>> {
pub fn try_into_list(&self) -> Result<Option<ListValueRef<'_>>> {
impl_as_for_value_ref!(self, List)
}
pub fn as_struct(&self) -> Result<Option<StructValueRef<'_>>> {
/// Cast itself to [StructValueRef].
pub fn try_into_struct(&self) -> Result<Option<StructValueRef<'_>>> {
impl_as_for_value_ref!(self, Struct)
}
/// Cast itself to [Decimal128].
pub fn as_decimal128(&self) -> Result<Option<Decimal128>> {
pub fn try_into_decimal128(&self) -> Result<Option<Decimal128>> {
impl_as_for_value_ref!(self, Decimal128)
}
}
@@ -1454,7 +1553,7 @@ pub enum ListValueRef<'a> {
},
RefList {
val: Vec<ValueRef<'a>>,
item_datatype: ConcreteDataType,
item_datatype: Arc<ConcreteDataType>,
},
}
@@ -1471,9 +1570,9 @@ impl ListValueRef<'_> {
}
}
/// Returns the inner element's data type.
fn datatype(&self) -> ConcreteDataType {
fn datatype(&self) -> Arc<ConcreteDataType> {
match self {
ListValueRef::Indexed { vector, .. } => vector.data_type(),
ListValueRef::Indexed { vector, .. } => vector.item_type(),
ListValueRef::Ref { val } => val.datatype().clone(),
ListValueRef::RefList { item_datatype, .. } => item_datatype.clone(),
}
@@ -1614,13 +1713,20 @@ impl ValueRef<'_> {
ValueRef::List(v) => match v {
ListValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
ListValueRef::Ref { val } => val.estimated_size(),
ListValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(),
ListValueRef::RefList { val, .. } => {
val.iter().map(|v| v.data_size()).sum::<usize>()
+ std::mem::size_of::<Arc<ConcreteDataType>>()
}
},
ValueRef::Struct(val) => match val {
StructValueRef::Indexed { vector, .. } => vector.memory_size() / vector.len(),
StructValueRef::Ref(val) => val.estimated_size(),
StructValueRef::RefList { val, .. } => val.iter().map(|v| v.data_size()).sum(),
StructValueRef::RefList { val, .. } => {
val.iter().map(|v| v.data_size()).sum::<usize>()
+ std::mem::size_of::<StructType>()
}
},
ValueRef::Json(v) => v.data_size(),
}
}
}
@@ -1636,7 +1742,7 @@ pub(crate) mod tests {
use crate::vectors::ListVectorBuilder;
pub(crate) fn build_struct_type() -> StructType {
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new("id".to_string(), ConcreteDataType::int32_datatype(), false),
StructField::new(
"name".to_string(),
@@ -1649,7 +1755,12 @@ pub(crate) mod tests {
ConcreteDataType::string_datatype(),
true,
),
])
StructField::new(
"awards".to_string(),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype())),
true,
),
]))
}
pub(crate) fn build_struct_value() -> StructValue {
@@ -1660,6 +1771,7 @@ pub(crate) mod tests {
Value::String("tom".into()),
Value::UInt8(25),
Value::String("94038".into()),
Value::List(build_list_value()),
];
StructValue::try_new(struct_items, struct_type).unwrap()
}
@@ -1671,14 +1783,19 @@ pub(crate) mod tests {
ScalarValue::Utf8(Some("tom".into())).to_array().unwrap(),
ScalarValue::UInt8(Some(25)).to_array().unwrap(),
ScalarValue::Utf8(Some("94038".into())).to_array().unwrap(),
build_scalar_list_value().to_array().unwrap(),
];
let struct_arrow_array = StructArray::new(struct_type.as_arrow_fields(), arrays, None);
ScalarValue::Struct(Arc::new(struct_arrow_array))
}
pub fn build_list_type() -> ConcreteDataType {
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::boolean_datatype()))
}
pub(crate) fn build_list_value() -> ListValue {
let items = vec![Value::Boolean(true), Value::Boolean(false)];
ListValue::new(items, ConcreteDataType::boolean_datatype())
ListValue::new(items, Arc::new(ConcreteDataType::boolean_datatype()))
}
pub(crate) fn build_scalar_list_value() -> ScalarValue {
@@ -1804,7 +1921,10 @@ pub(crate) mod tests {
build_scalar_list_value().try_into().unwrap()
);
assert_eq!(
Value::List(ListValue::new(vec![], ConcreteDataType::uint32_datatype())),
Value::List(ListValue::new(
vec![],
Arc::new(ConcreteDataType::uint32_datatype())
)),
ScalarValue::List(ScalarValue::new_list(&[], &ArrowDataType::UInt32, true))
.try_into()
.unwrap()
@@ -2071,15 +2191,13 @@ pub(crate) mod tests {
&ConcreteDataType::binary_datatype(),
&Value::Binary(Bytes::from(b"world".as_slice())),
);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
&Value::List(ListValue::new(
vec![Value::Int32(10)],
ConcreteDataType::int32_datatype(),
)),
&ConcreteDataType::list_datatype(item_type.clone()),
&Value::List(ListValue::new(vec![Value::Int32(10)], item_type.clone())),
);
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::null_datatype()),
&ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::null_datatype())),
&Value::List(ListValue::default()),
);
check_type_and_value(
@@ -2139,11 +2257,12 @@ pub(crate) mod tests {
&Value::Decimal128(Decimal128::new(1, 38, 10)),
);
let item_type = Arc::new(ConcreteDataType::boolean_datatype());
check_type_and_value(
&ConcreteDataType::list_datatype(ConcreteDataType::boolean_datatype()),
&ConcreteDataType::list_datatype(item_type.clone()),
&Value::List(ListValue::new(
vec![Value::Boolean(true)],
ConcreteDataType::boolean_datatype(),
item_type.clone(),
)),
);
@@ -2151,6 +2270,23 @@ pub(crate) mod tests {
&ConcreteDataType::struct_datatype(build_struct_type()),
&Value::Struct(build_struct_value()),
);
check_type_and_value(
&ConcreteDataType::json_native_datatype(ConcreteDataType::boolean_datatype()),
&Value::Json(Box::new(Value::Boolean(true))),
);
check_type_and_value(
&ConcreteDataType::json_native_datatype(build_list_type()),
&Value::Json(Box::new(Value::List(build_list_value()))),
);
check_type_and_value(
&ConcreteDataType::json_native_datatype(ConcreteDataType::struct_datatype(
build_struct_type(),
)),
&Value::Json(Box::new(Value::Struct(build_struct_value()))),
);
}
#[test]
@@ -2255,7 +2391,7 @@ pub(crate) mod tests {
json_value,
to_json(Value::List(ListValue {
items: vec![Value::Int32(123)],
datatype: ConcreteDataType::int32_datatype(),
datatype: Arc::new(ConcreteDataType::int32_datatype()),
}))
);
@@ -2265,7 +2401,7 @@ pub(crate) mod tests {
Value::String("tomcat".into()),
Value::Boolean(true),
],
StructType::new(vec![
StructType::new(Arc::new(vec![
StructField::new("num".to_string(), ConcreteDataType::int64_datatype(), true),
StructField::new(
"name".to_string(),
@@ -2277,11 +2413,39 @@ pub(crate) mod tests {
ConcreteDataType::boolean_datatype(),
true,
),
]),
])),
)
.unwrap();
assert_eq!(
serde_json::Value::try_from(Value::Struct(struct_value)).unwrap(),
serde_json::Value::try_from(Value::Struct(struct_value.clone())).unwrap(),
serde_json::json!({
"num": 42,
"name": "tomcat",
"yes_or_no": true
})
);
// string wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::String("hello".into()))))
.unwrap(),
serde_json::json!("hello")
);
// list wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::List(ListValue::new(
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3),],
Arc::new(ConcreteDataType::int32_datatype())
)))))
.unwrap(),
serde_json::json!([1, 2, 3])
);
// struct wrapped in json
assert_eq!(
serde_json::Value::try_from(Value::Json(Box::new(Value::Struct(struct_value))))
.unwrap(),
serde_json::json!({
"num": 42,
"name": "tomcat",
@@ -2293,6 +2457,7 @@ pub(crate) mod tests {
#[test]
fn test_null_value() {
assert!(Value::Null.is_null());
assert!(Value::Json(Box::new(Value::Null)).is_null());
assert!(!Value::Boolean(true).is_null());
assert!(Value::Null < Value::Boolean(false));
assert!(Value::Boolean(true) > Value::Null);
@@ -2371,6 +2536,13 @@ pub(crate) mod tests {
ValueRef::Struct(StructValueRef::Ref(&struct_value)),
Value::Struct(struct_value.clone()).as_value_ref()
);
assert_eq!(
ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref(
&struct_value
)))),
Value::Json(Box::new(Value::Struct(struct_value.clone()))).as_value_ref()
);
}
#[test]
@@ -2381,11 +2553,11 @@ pub(crate) mod tests {
};
}
check_as_null!(as_binary);
check_as_null!(as_string);
check_as_null!(as_boolean);
check_as_null!(as_date);
check_as_null!(as_list);
check_as_null!(try_into_binary);
check_as_null!(try_into_string);
check_as_null!(try_into_boolean);
check_as_null!(try_into_list);
check_as_null!(try_into_struct);
macro_rules! check_as_correct {
($data: expr, $Variant: ident, $method: ident) => {
@@ -2393,27 +2565,29 @@ pub(crate) mod tests {
};
}
check_as_correct!("hello", String, as_string);
check_as_correct!("hello".as_bytes(), Binary, as_binary);
check_as_correct!(true, Boolean, as_boolean);
check_as_correct!(Date::new(123), Date, as_date);
check_as_correct!(Time::new_second(12), Time, as_time);
check_as_correct!(Duration::new_second(12), Duration, as_duration);
check_as_correct!("hello", String, try_into_string);
check_as_correct!("hello".as_bytes(), Binary, try_into_binary);
check_as_correct!(true, Boolean, try_into_boolean);
check_as_correct!(Date::new(123), Date, try_into_date);
check_as_correct!(Time::new_second(12), Time, try_into_time);
check_as_correct!(Duration::new_second(12), Duration, try_into_duration);
let list = build_list_value();
check_as_correct!(ListValueRef::Ref { val: &list }, List, as_list);
check_as_correct!(ListValueRef::Ref { val: &list }, List, try_into_list);
let struct_value = build_struct_value();
check_as_correct!(StructValueRef::Ref(&struct_value), Struct, as_struct);
check_as_correct!(StructValueRef::Ref(&struct_value), Struct, try_into_struct);
let wrong_value = ValueRef::Int32(12345);
assert!(wrong_value.as_binary().is_err());
assert!(wrong_value.as_string().is_err());
assert!(wrong_value.as_boolean().is_err());
assert!(wrong_value.as_date().is_err());
assert!(wrong_value.as_list().is_err());
assert!(wrong_value.as_time().is_err());
assert!(wrong_value.as_timestamp().is_err());
assert!(wrong_value.try_into_binary().is_err());
assert!(wrong_value.try_into_string().is_err());
assert!(wrong_value.try_into_boolean().is_err());
assert!(wrong_value.try_into_list().is_err());
assert!(wrong_value.try_into_struct().is_err());
assert!(wrong_value.try_into_date().is_err());
assert!(wrong_value.try_into_time().is_err());
assert!(wrong_value.try_into_timestamp().is_err());
assert!(wrong_value.try_into_duration().is_err());
}
#[test]
@@ -2457,7 +2631,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_second_datatype(),
Arc::new(ConcreteDataType::timestamp_second_datatype()),
))
.to_string(),
"TimestampSecond[]"
@@ -2465,7 +2639,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_millisecond_datatype(),
Arc::new(ConcreteDataType::timestamp_millisecond_datatype()),
))
.to_string(),
"TimestampMillisecond[]"
@@ -2473,7 +2647,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_microsecond_datatype(),
Arc::new(ConcreteDataType::timestamp_microsecond_datatype()),
))
.to_string(),
"TimestampMicrosecond[]"
@@ -2481,7 +2655,7 @@ pub(crate) mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![],
ConcreteDataType::timestamp_nanosecond_datatype(),
Arc::new(ConcreteDataType::timestamp_nanosecond_datatype()),
))
.to_string(),
"TimestampNanosecond[]"
@@ -2489,8 +2663,13 @@ pub(crate) mod tests {
assert_eq!(
Value::Struct(build_struct_value()).to_string(),
"{ id: 1, name: tom, age: 25, address: 94038 }"
"{ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] }"
);
assert_eq!(
Value::Json(Box::new(Value::Struct(build_struct_value()))).to_string(),
"Json({ id: 1, name: tom, age: 25, address: 94038, awards: Boolean[true, false] })"
)
}
#[test]
@@ -2600,9 +2779,9 @@ pub(crate) mod tests {
assert_eq!(
build_scalar_list_value(),
Value::List(build_list_value())
.try_to_scalar_value(&ConcreteDataType::list_datatype(
.try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::boolean_datatype()
))
)))
.unwrap()
);
}
@@ -2747,9 +2926,9 @@ pub(crate) mod tests {
assert_eq!(
ScalarValue::new_null_list(ArrowDataType::Boolean, true, 1),
Value::Null
.try_to_scalar_value(&ConcreteDataType::list_datatype(
ConcreteDataType::boolean_datatype(),
))
.try_to_scalar_value(&ConcreteDataType::list_datatype(Arc::new(
ConcreteDataType::boolean_datatype()
)))
.unwrap()
);
@@ -2764,11 +2943,10 @@ pub(crate) mod tests {
#[test]
fn test_list_value_to_scalar_value() {
let items = vec![Value::Int32(-1), Value::Null];
let list = Value::List(ListValue::new(items, ConcreteDataType::int32_datatype()));
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list = Value::List(ListValue::new(items, item_type.clone()));
let df_list = list
.try_to_scalar_value(&ConcreteDataType::list_datatype(
ConcreteDataType::int32_datatype(),
))
.try_to_scalar_value(&ConcreteDataType::list_datatype(item_type.clone()))
.unwrap();
assert!(matches!(df_list, ScalarValue::List(_)));
match df_list {
@@ -2927,10 +3105,10 @@ pub(crate) mod tests {
Value::String("hello world".into()),
Value::String("greptimedb".into()),
],
datatype: ConcreteDataType::string_datatype(),
datatype: Arc::new(ConcreteDataType::string_datatype()),
},
}),
22,
30,
);
let data = vec![
@@ -2938,12 +3116,12 @@ pub(crate) mod tests {
None,
Some(vec![Some(4), None, Some(6)]),
];
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8);
for vec_opt in &data {
if let Some(vec) = vec_opt {
let values = vec.iter().map(|v| Value::from(*v)).collect();
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
let list_value = ListValue::new(values, item_type.clone());
builder.push(Some(ListValueRef::Ref { val: &list_value }));
} else {
@@ -2977,7 +3155,14 @@ pub(crate) mod tests {
check_value_ref_size_eq(
&ValueRef::Struct(StructValueRef::Ref(&build_struct_value())),
13,
31,
);
check_value_ref_size_eq(
&ValueRef::Json(Box::new(ValueRef::Struct(StructValueRef::Ref(
&build_struct_value(),
)))),
31,
);
}

View File

@@ -424,12 +424,12 @@ pub mod tests {
#[test]
#[should_panic(expected = "Must use ListVectorBuilder::with_type_capacity()")]
fn test_mutable_vector_list_data_type() {
let item_type = Arc::new(ConcreteDataType::int32_datatype());
// List type
let builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 1024);
let builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1024);
assert_eq!(
builder.data_type(),
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype())
ConcreteDataType::list_datatype(item_type)
);
// Panic with_capacity

View File

@@ -242,7 +242,7 @@ impl MutableVector for BinaryVectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
match value.as_binary()? {
match value.try_into_binary()? {
Some(v) => self.mutable_array.append_value(v),
None => self.mutable_array.append_null(),
}
@@ -475,7 +475,7 @@ mod tests {
.collect::<Vec<_>>();
for i in 0..3 {
assert_eq!(
json_vector.get_ref(i).as_binary().unwrap().unwrap(),
json_vector.get_ref(i).try_into_binary().unwrap().unwrap(),
jsonbs.get(i).unwrap().as_slice()
);
}
@@ -486,7 +486,7 @@ mod tests {
.unwrap();
for i in 0..3 {
assert_eq!(
json_vector.get_ref(i).as_binary().unwrap().unwrap(),
json_vector.get_ref(i).try_into_binary().unwrap().unwrap(),
jsonbs.get(i).unwrap().as_slice()
);
}
@@ -551,8 +551,8 @@ mod tests {
assert_eq!(converted.len(), expected.len());
for i in 0..3 {
assert_eq!(
converted.get_ref(i).as_binary().unwrap().unwrap(),
expected.get_ref(i).as_binary().unwrap().unwrap()
converted.get_ref(i).try_into_binary().unwrap().unwrap(),
expected.get_ref(i).try_into_binary().unwrap().unwrap()
);
}
}

View File

@@ -180,7 +180,7 @@ impl MutableVector for BooleanVectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
match value.as_boolean()? {
match value.try_into_boolean()? {
Some(v) => self.mutable_array.append_value(v),
None => self.mutable_array.append_null(),
}

View File

@@ -315,7 +315,7 @@ impl MutableVector for Decimal128VectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
let decimal_val = value.as_decimal128()?.map(|v| v.val());
let decimal_val = value.try_into_decimal128()?.map(|v| v.val());
self.mutable_array.append_option(decimal_val);
Ok(())
}

View File

@@ -168,7 +168,7 @@ impl Helper {
ConstantVector::new(Arc::new(BinaryVector::from(vec![v])), length)
}
ScalarValue::List(array) => {
let item_type = ConcreteDataType::try_from(&array.value_type())?;
let item_type = Arc::new(ConcreteDataType::try_from(&array.value_type())?);
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 1);
let values = ScalarValue::convert_array_to_scalar_vec(array.as_ref())
.context(ConvertArrowArrayToScalarsSnafu)?
@@ -291,7 +291,8 @@ impl Helper {
ArrowDataType::Float32 => Arc::new(Float32Vector::try_from_arrow_array(array)?),
ArrowDataType::Float64 => Arc::new(Float64Vector::try_from_arrow_array(array)?),
ArrowDataType::Utf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
ArrowDataType::LargeUtf8 | ArrowDataType::Utf8View => {
ArrowDataType::LargeUtf8 => Arc::new(StringVector::try_from_arrow_array(array)?),
ArrowDataType::Utf8View => {
let array = arrow::compute::cast(array.as_ref(), &ArrowDataType::Utf8)
.context(crate::error::ArrowComputeSnafu)?;
Arc::new(StringVector::try_from_arrow_array(array)?)
@@ -559,7 +560,7 @@ mod tests {
));
let vector = Helper::try_from_scalar_value(value, 3).unwrap();
assert_eq!(
ConcreteDataType::list_datatype(ConcreteDataType::int32_datatype()),
ConcreteDataType::list_datatype(Arc::new(ConcreteDataType::int32_datatype())),
vector.data_type()
);
assert_eq!(3, vector.len());
@@ -742,17 +743,17 @@ mod tests {
#[test]
fn test_large_string_array_into_vector() {
let input_vec = vec!["a", "b"];
let assertion_array = StringArray::from(input_vec.clone());
let assertion_array = LargeStringArray::from(input_vec.clone());
let large_string_array: ArrayRef = Arc::new(LargeStringArray::from(input_vec));
let vector = Helper::try_into_vector(large_string_array).unwrap();
assert_eq!(2, vector.len());
assert_eq!(0, vector.null_count());
let output_arrow_array: StringArray = vector
let output_arrow_array: LargeStringArray = vector
.to_arrow_array()
.as_any()
.downcast_ref::<StringArray>()
.downcast_ref::<LargeStringArray>()
.unwrap()
.clone();
assert_eq!(&assertion_array, &output_arrow_array);

View File

@@ -35,7 +35,7 @@ use crate::vectors::{self, Helper, MutableVector, Validity, Vector, VectorRef};
pub struct ListVector {
array: ListArray,
/// The datatype of the items in the list.
item_type: ConcreteDataType,
item_type: Arc<ConcreteDataType>,
}
impl ListVector {
@@ -50,7 +50,7 @@ impl ListVector {
&self.array
}
pub(crate) fn item_type(&self) -> ConcreteDataType {
pub(crate) fn item_type(&self) -> Arc<ConcreteDataType> {
self.item_type.clone()
}
}
@@ -145,10 +145,10 @@ impl Serializable for ListVector {
impl From<ListArray> for ListVector {
fn from(array: ListArray) -> Self {
let item_type = ConcreteDataType::from_arrow_type(match array.data_type() {
let item_type = Arc::new(ConcreteDataType::from_arrow_type(match array.data_type() {
ArrowDataType::List(field) => field.data_type(),
other => panic!("Try to create ListVector from an arrow array with type {other:?}"),
});
}));
Self { array, item_type }
}
}
@@ -217,7 +217,7 @@ impl ScalarVector for ListVector {
// See https://github.com/apache/arrow-rs/blob/94565bca99b5d9932a3e9a8e094aaf4e4384b1e5/arrow-array/src/builder/generic_list_builder.rs
/// [ListVector] builder.
pub struct ListVectorBuilder {
item_type: ConcreteDataType,
item_type: Arc<ConcreteDataType>,
offsets_builder: Int32BufferBuilder,
null_buffer_builder: NullBufferBuilder,
values_builder: Box<dyn MutableVector>,
@@ -226,7 +226,10 @@ pub struct ListVectorBuilder {
impl ListVectorBuilder {
/// Creates a new [`ListVectorBuilder`]. `item_type` is the data type of the list item, `capacity`
/// is the number of items to pre-allocate space for in this builder.
pub fn with_type_capacity(item_type: ConcreteDataType, capacity: usize) -> ListVectorBuilder {
pub fn with_type_capacity(
item_type: Arc<ConcreteDataType>,
capacity: usize,
) -> ListVectorBuilder {
let mut offsets_builder = Int32BufferBuilder::new(capacity + 1);
offsets_builder.append(0);
// The actual required capacity might be greater than the capacity of the `ListVector`
@@ -284,7 +287,7 @@ impl MutableVector for ListVectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
if let Some(list_ref) = value.as_list()? {
if let Some(list_ref) = value.try_into_list()? {
match list_ref {
ListValueRef::Indexed { vector, idx } => match vector.get(idx).as_list()? {
Some(list_value) => self.push_list_value(list_value)?,
@@ -496,12 +499,12 @@ pub mod tests {
use crate::vectors::Int32Vector;
pub fn new_list_vector(data: &[Option<Vec<Option<i32>>>]) -> ListVector {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 8);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 8);
for vec_opt in data {
if let Some(vec) = vec_opt {
let values = vec.iter().map(|v| Value::from(*v)).collect();
let list_value = ListValue::new(values, ConcreteDataType::int32_datatype());
let list_value = ListValue::new(values, item_type.clone());
builder.push(Some(ListValueRef::Ref { val: &list_value }));
} else {
@@ -537,10 +540,11 @@ pub mod tests {
Some(vec![Some(4), None, Some(6)]),
];
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_vector = new_list_vector(&data);
assert_eq!(
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
ConcreteDataType::List(ListType::new(item_type.clone())),
list_vector.data_type()
);
assert_eq!("ListVector", list_vector.vector_type_name());
@@ -581,7 +585,7 @@ pub mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![Value::Int32(1), Value::Int32(2), Value::Int32(3)],
ConcreteDataType::int32_datatype()
item_type.clone()
)),
list_vector.get(0)
);
@@ -600,7 +604,7 @@ pub mod tests {
assert_eq!(
Value::List(ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype()
item_type.clone()
)),
list_vector.get(2)
);
@@ -636,10 +640,11 @@ pub mod tests {
Some(vec![Some(4), None, Some(6)]),
];
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let list_vector = new_list_vector(&data);
assert_eq!(
ConcreteDataType::List(ListType::new(ConcreteDataType::int32_datatype())),
ConcreteDataType::List(ListType::new(item_type.clone())),
list_vector.data_type()
);
let mut iter = list_vector.values_iter();
@@ -672,12 +677,12 @@ pub mod tests {
#[test]
fn test_list_vector_builder() {
let mut builder =
ListType::new(ConcreteDataType::int32_datatype()).create_mutable_vector(3);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListType::new(item_type.clone()).create_mutable_vector(3);
builder.push_value_ref(&ValueRef::List(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
assert!(builder.try_push_value_ref(&ValueRef::Int32(123)).is_err());
@@ -706,13 +711,13 @@ pub mod tests {
#[test]
fn test_list_vector_for_scalar() {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2);
builder.push(None);
builder.push(Some(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
let vector = builder.finish();
@@ -757,13 +762,13 @@ pub mod tests {
#[test]
fn test_list_vector_builder_finish_cloned() {
let mut builder =
ListVectorBuilder::with_type_capacity(ConcreteDataType::int32_datatype(), 2);
let item_type = Arc::new(ConcreteDataType::int32_datatype());
let mut builder = ListVectorBuilder::with_type_capacity(item_type.clone(), 2);
builder.push(None);
builder.push(Some(ListValueRef::Ref {
val: &ListValue::new(
vec![Value::Int32(4), Value::Null, Value::Int32(6)],
ConcreteDataType::int32_datatype(),
item_type.clone(),
),
}));
let vector = builder.finish_cloned();

View File

@@ -18,7 +18,9 @@ use std::sync::Arc;
use arrow::array::{Array, ArrayBuilder, ArrayIter, ArrayRef};
use snafu::ResultExt;
use crate::arrow_array::{MutableStringArray, StringArray};
use crate::arrow_array::{
LargeStringArray, MutableLargeStringArray, MutableStringArray, StringArray,
};
use crate::data_type::ConcreteDataType;
use crate::error::{self, Result};
use crate::scalars::{ScalarVector, ScalarVectorBuilder};
@@ -26,69 +28,93 @@ use crate::serialize::Serializable;
use crate::value::{Value, ValueRef};
use crate::vectors::{self, MutableVector, Validity, Vector, VectorRef};
/// Internal representation for string arrays
#[derive(Debug, PartialEq)]
enum StringArrayData {
String(StringArray),
LargeString(LargeStringArray),
}
/// Vector of strings.
#[derive(Debug, PartialEq)]
pub struct StringVector {
array: StringArray,
array: StringArrayData,
}
impl StringVector {
pub(crate) fn as_arrow(&self) -> &dyn Array {
&self.array
match &self.array {
StringArrayData::String(array) => array,
StringArrayData::LargeString(array) => array,
}
}
/// Create a StringVector from a regular StringArray
pub fn from_string_array(array: StringArray) -> Self {
Self {
array: StringArrayData::String(array),
}
}
/// Create a StringVector from a LargeStringArray
pub fn from_large_string_array(array: LargeStringArray) -> Self {
Self {
array: StringArrayData::LargeString(array),
}
}
pub fn from_slice<T: AsRef<str>>(slice: &[T]) -> Self {
Self::from_string_array(StringArray::from_iter(
slice.iter().map(|s| Some(s.as_ref())),
))
}
}
impl From<StringArray> for StringVector {
fn from(array: StringArray) -> Self {
Self { array }
Self::from_string_array(array)
}
}
impl From<LargeStringArray> for StringVector {
fn from(array: LargeStringArray) -> Self {
Self::from_large_string_array(array)
}
}
impl From<Vec<Option<String>>> for StringVector {
fn from(data: Vec<Option<String>>) -> Self {
Self {
array: StringArray::from_iter(data),
}
Self::from_string_array(StringArray::from_iter(data))
}
}
impl From<Vec<Option<&str>>> for StringVector {
fn from(data: Vec<Option<&str>>) -> Self {
Self {
array: StringArray::from_iter(data),
}
Self::from_string_array(StringArray::from_iter(data))
}
}
impl From<&[Option<String>]> for StringVector {
fn from(data: &[Option<String>]) -> Self {
Self {
array: StringArray::from_iter(data),
}
Self::from_string_array(StringArray::from_iter(data))
}
}
impl From<&[Option<&str>]> for StringVector {
fn from(data: &[Option<&str>]) -> Self {
Self {
array: StringArray::from_iter(data),
}
Self::from_string_array(StringArray::from_iter(data))
}
}
impl From<Vec<String>> for StringVector {
fn from(data: Vec<String>) -> Self {
Self {
array: StringArray::from_iter(data.into_iter().map(Some)),
}
Self::from_string_array(StringArray::from_iter(data.into_iter().map(Some)))
}
}
impl From<Vec<&str>> for StringVector {
fn from(data: Vec<&str>) -> Self {
Self {
array: StringArray::from_iter(data.into_iter().map(Some)),
}
Self::from_string_array(StringArray::from_iter(data.into_iter().map(Some)))
}
}
@@ -106,67 +132,177 @@ impl Vector for StringVector {
}
fn len(&self) -> usize {
self.array.len()
match &self.array {
StringArrayData::String(array) => array.len(),
StringArrayData::LargeString(array) => array.len(),
}
}
fn to_arrow_array(&self) -> ArrayRef {
Arc::new(self.array.clone())
match &self.array {
StringArrayData::String(array) => Arc::new(array.clone()),
StringArrayData::LargeString(array) => Arc::new(array.clone()),
}
}
fn to_boxed_arrow_array(&self) -> Box<dyn Array> {
Box::new(self.array.clone())
match &self.array {
StringArrayData::String(array) => Box::new(array.clone()),
StringArrayData::LargeString(array) => Box::new(array.clone()),
}
}
fn validity(&self) -> Validity {
vectors::impl_validity_for_vector!(self.array)
match &self.array {
StringArrayData::String(array) => vectors::impl_validity_for_vector!(array),
StringArrayData::LargeString(array) => vectors::impl_validity_for_vector!(array),
}
}
fn memory_size(&self) -> usize {
self.array.get_buffer_memory_size()
match &self.array {
StringArrayData::String(array) => array.get_buffer_memory_size(),
StringArrayData::LargeString(array) => array.get_buffer_memory_size(),
}
}
fn null_count(&self) -> usize {
self.array.null_count()
match &self.array {
StringArrayData::String(array) => array.null_count(),
StringArrayData::LargeString(array) => array.null_count(),
}
}
fn is_null(&self, row: usize) -> bool {
self.array.is_null(row)
match &self.array {
StringArrayData::String(array) => array.is_null(row),
StringArrayData::LargeString(array) => array.is_null(row),
}
}
fn slice(&self, offset: usize, length: usize) -> VectorRef {
Arc::new(Self::from(self.array.slice(offset, length)))
match &self.array {
StringArrayData::String(array) => {
Arc::new(Self::from_string_array(array.slice(offset, length)))
}
StringArrayData::LargeString(array) => {
Arc::new(Self::from_large_string_array(array.slice(offset, length)))
}
}
}
fn get(&self, index: usize) -> Value {
vectors::impl_get_for_vector!(self.array, index)
match &self.array {
StringArrayData::String(array) => vectors::impl_get_for_vector!(array, index),
StringArrayData::LargeString(array) => vectors::impl_get_for_vector!(array, index),
}
}
fn get_ref(&self, index: usize) -> ValueRef<'_> {
vectors::impl_get_ref_for_vector!(self.array, index)
match &self.array {
StringArrayData::String(array) => vectors::impl_get_ref_for_vector!(array, index),
StringArrayData::LargeString(array) => vectors::impl_get_ref_for_vector!(array, index),
}
}
}
pub enum StringIter<'a> {
String(ArrayIter<&'a StringArray>),
LargeString(ArrayIter<&'a LargeStringArray>),
}
impl<'a> Iterator for StringIter<'a> {
type Item = Option<&'a str>;
fn next(&mut self) -> Option<Self::Item> {
match self {
StringIter::String(iter) => iter.next(),
StringIter::LargeString(iter) => iter.next(),
}
}
}
impl ScalarVector for StringVector {
type OwnedItem = String;
type RefItem<'a> = &'a str;
type Iter<'a> = ArrayIter<&'a StringArray>;
type Iter<'a> = StringIter<'a>;
type Builder = StringVectorBuilder;
fn get_data(&self, idx: usize) -> Option<Self::RefItem<'_>> {
if self.array.is_valid(idx) {
Some(self.array.value(idx))
} else {
None
match &self.array {
StringArrayData::String(array) => {
if array.is_valid(idx) {
Some(array.value(idx))
} else {
None
}
}
StringArrayData::LargeString(array) => {
if array.is_valid(idx) {
Some(array.value(idx))
} else {
None
}
}
}
}
fn iter_data(&self) -> Self::Iter<'_> {
self.array.iter()
match &self.array {
StringArrayData::String(array) => StringIter::String(array.iter()),
StringArrayData::LargeString(array) => StringIter::LargeString(array.iter()),
}
}
}
/// Internal representation for mutable string arrays
enum MutableStringArrayData {
String(MutableStringArray),
LargeString(MutableLargeStringArray),
}
pub struct StringVectorBuilder {
pub mutable_array: MutableStringArray,
mutable_array: MutableStringArrayData,
}
impl Default for StringVectorBuilder {
fn default() -> Self {
Self::new()
}
}
impl StringVectorBuilder {
/// Create a builder for regular strings
pub fn new() -> Self {
Self {
mutable_array: MutableStringArrayData::String(MutableStringArray::new()),
}
}
/// Create a builder for large strings
pub fn new_large() -> Self {
Self {
mutable_array: MutableStringArrayData::LargeString(MutableLargeStringArray::new()),
}
}
/// Create a builder for regular strings with capacity
pub fn with_string_capacity(capacity: usize) -> Self {
Self {
mutable_array: MutableStringArrayData::String(MutableStringArray::with_capacity(
capacity, 0,
)),
}
}
/// Create a builder for large strings with capacity
pub fn with_large_capacity(capacity: usize) -> Self {
Self {
mutable_array: MutableStringArrayData::LargeString(
MutableLargeStringArray::with_capacity(capacity, 0),
),
}
}
}
impl MutableVector for StringVectorBuilder {
@@ -175,7 +311,10 @@ impl MutableVector for StringVectorBuilder {
}
fn len(&self) -> usize {
self.mutable_array.len()
match &self.mutable_array {
MutableStringArrayData::String(array) => array.len(),
MutableStringArrayData::LargeString(array) => array.len(),
}
}
fn as_any(&self) -> &dyn Any {
@@ -193,11 +332,16 @@ impl MutableVector for StringVectorBuilder {
fn to_vector_cloned(&self) -> VectorRef {
Arc::new(self.finish_cloned())
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
match value.as_string()? {
Some(v) => self.mutable_array.append_value(v),
None => self.mutable_array.append_null(),
match value.try_into_string()? {
Some(v) => match &mut self.mutable_array {
MutableStringArrayData::String(array) => array.append_value(v),
MutableStringArrayData::LargeString(array) => array.append_value(v),
},
None => match &mut self.mutable_array {
MutableStringArrayData::String(array) => array.append_null(),
MutableStringArrayData::LargeString(array) => array.append_null(),
},
}
Ok(())
}
@@ -207,7 +351,10 @@ impl MutableVector for StringVectorBuilder {
}
fn push_null(&mut self) {
self.mutable_array.append_null()
match &mut self.mutable_array {
MutableStringArrayData::String(array) => array.append_null(),
MutableStringArrayData::LargeString(array) => array.append_null(),
}
}
}
@@ -216,26 +363,44 @@ impl ScalarVectorBuilder for StringVectorBuilder {
fn with_capacity(capacity: usize) -> Self {
Self {
mutable_array: MutableStringArray::with_capacity(capacity, 0),
mutable_array: MutableStringArrayData::String(MutableStringArray::with_capacity(
capacity, 0,
)),
}
}
fn push(&mut self, value: Option<<Self::VectorType as ScalarVector>::RefItem<'_>>) {
match value {
Some(v) => self.mutable_array.append_value(v),
None => self.mutable_array.append_null(),
Some(v) => match &mut self.mutable_array {
MutableStringArrayData::String(array) => array.append_value(v),
MutableStringArrayData::LargeString(array) => array.append_value(v),
},
None => match &mut self.mutable_array {
MutableStringArrayData::String(array) => array.append_null(),
MutableStringArrayData::LargeString(array) => array.append_null(),
},
}
}
fn finish(&mut self) -> Self::VectorType {
StringVector {
array: self.mutable_array.finish(),
match &mut self.mutable_array {
MutableStringArrayData::String(array) => {
StringVector::from_string_array(array.finish())
}
MutableStringArrayData::LargeString(array) => {
StringVector::from_large_string_array(array.finish())
}
}
}
fn finish_cloned(&self) -> Self::VectorType {
StringVector {
array: self.mutable_array.finish_cloned(),
match &self.mutable_array {
MutableStringArrayData::String(array) => {
StringVector::from_string_array(array.finish_cloned())
}
MutableStringArrayData::LargeString(array) => {
StringVector::from_large_string_array(array.finish_cloned())
}
}
}
}
@@ -249,7 +414,26 @@ impl Serializable for StringVector {
}
}
vectors::impl_try_from_arrow_array_for_vector!(StringArray, StringVector);
impl StringVector {
pub fn try_from_arrow_array(
array: impl AsRef<dyn Array>,
) -> crate::error::Result<StringVector> {
let array = array.as_ref();
if let Some(string_array) = array.as_any().downcast_ref::<StringArray>() {
Ok(StringVector::from_string_array(string_array.clone()))
} else if let Some(large_string_array) = array.as_any().downcast_ref::<LargeStringArray>() {
Ok(StringVector::from_large_string_array(
large_string_array.clone(),
))
} else {
Err(crate::error::UnsupportedArrowTypeSnafu {
arrow_type: array.data_type().clone(),
}
.build())
}
}
}
#[cfg(test)]
mod tests {

View File

@@ -351,7 +351,7 @@ impl MutableVector for StructVectorBuilder {
}
fn try_push_value_ref(&mut self, value: &ValueRef) -> Result<()> {
if let Some(struct_ref) = value.as_struct()? {
if let Some(struct_ref) = value.try_into_struct()? {
match struct_ref {
StructValueRef::Indexed { vector, idx } => match vector.get(idx).as_struct()? {
Some(struct_value) => self.push_struct_value(struct_value)?,
@@ -438,6 +438,8 @@ impl ScalarVectorBuilder for StructVectorBuilder {
#[cfg(test)]
mod tests {
use super::*;
use crate::types::StructField;
use crate::value::ListValue;
use crate::value::tests::*;
#[test]
@@ -478,9 +480,46 @@ mod tests {
assert_eq!(items.next(), Some(&Value::String("tom".into())));
assert_eq!(items.next(), Some(&Value::UInt8(25)));
assert_eq!(items.next(), Some(&Value::String("94038".into())));
assert_eq!(items.next(), Some(&Value::List(build_list_value())));
assert_eq!(items.next(), None);
} else {
panic!("Expected a struct value");
}
}
#[test]
fn test_deep_nested_struct_list() {
// level 1: struct
let struct_type = ConcreteDataType::struct_datatype(build_struct_type());
let struct_value = build_struct_value();
// level 2: list
let struct_type_ref = Arc::new(struct_type);
let list_type = ConcreteDataType::list_datatype(struct_type_ref.clone());
let list_value = ListValue::new(
vec![
Value::Struct(struct_value.clone()),
Value::Struct(struct_value.clone()),
],
struct_type_ref.clone(),
);
// level 3: struct
let root_type = StructType::new(Arc::new(vec![StructField::new(
"items".to_string(),
list_type,
false,
)]));
let root_value = StructValue::new(vec![Value::List(list_value)], root_type.clone());
let mut builder = StructVectorBuilder::with_type_and_capacity(root_type.clone(), 20);
builder.push(Some(StructValueRef::Ref(&root_value)));
let vector = builder.finish();
assert_eq!(vector.len(), 1);
assert_eq!(vector.null_count(), 0);
assert_eq!(
vector.data_type(),
ConcreteDataType::struct_datatype(root_type)
);
assert_eq!(vector.get(0), Value::Struct(root_value));
}
}

View File

@@ -32,6 +32,7 @@ common-options.workspace = true
common-query.workspace = true
common-recordbatch.workspace = true
common-runtime.workspace = true
common-stat.workspace = true
common-telemetry.workspace = true
common-time.workspace = true
common-version.workspace = true
@@ -48,6 +49,7 @@ enum_dispatch = "0.3"
futures.workspace = true
get-size2 = "0.1.2"
greptime-proto.workspace = true
hostname.workspace = true
http.workspace = true
humantime-serde.workspace = true
itertools.workspace = true

View File

@@ -26,6 +26,7 @@ use common_error::ext::BoxedError;
use common_meta::key::TableMetadataManagerRef;
use common_options::memory::MemoryOptions;
use common_runtime::JoinHandle;
use common_stat::get_total_cpu_cores;
use common_telemetry::logging::{LoggingOptions, TracingOptions};
use common_telemetry::{debug, info, trace};
use datatypes::schema::ColumnSchema;
@@ -92,7 +93,7 @@ pub struct FlowConfig {
impl Default for FlowConfig {
fn default() -> Self {
Self {
num_workers: (common_config::utils::get_cpus() / 2).max(1),
num_workers: (get_total_cpu_cores() / 2).max(1),
batching_mode: BatchingModeOptions::default(),
}
}
@@ -141,7 +142,7 @@ impl Default for FlownodeOptions {
impl Configurable for FlownodeOptions {
fn validate_sanitize(&mut self) -> common_config::error::Result<()> {
if self.flow.num_workers == 0 {
self.flow.num_workers = (common_config::utils::get_cpus() / 2).max(1);
self.flow.num_workers = (get_total_cpu_cores() / 2).max(1);
}
Ok(())
}

View File

@@ -1146,7 +1146,7 @@ fn from_accums_to_offsetted_accum(new_accums: Vec<Vec<Value>>) -> Vec<Value> {
})
.map(Value::from)
.collect::<Vec<_>>();
let first = ListValue::new(offset, ConcreteDataType::uint64_datatype());
let first = ListValue::new(offset, Arc::new(ConcreteDataType::uint64_datatype()));
let first = Value::List(first);
// construct new_accums

View File

@@ -185,6 +185,10 @@ impl HeartbeatTask {
start_time_ms,
cpus,
memory_bytes,
hostname: hostname::get()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
})
}

View File

@@ -46,6 +46,7 @@ datafusion-expr.workspace = true
datanode.workspace = true
datatypes.workspace = true
futures.workspace = true
hostname.workspace = true
humantime.workspace = true
humantime-serde.workspace = true
lazy_static.workspace = true
@@ -70,7 +71,6 @@ snafu.workspace = true
sql.workspace = true
sqlparser.workspace = true
store-api.workspace = true
substrait.workspace = true
table.workspace = true
tokio.workspace = true
tokio-util.workspace = true

View File

@@ -158,6 +158,10 @@ impl HeartbeatTask {
start_time_ms,
cpus,
memory_bytes,
hostname: hostname::get()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
})
}

View File

@@ -229,6 +229,12 @@ impl Limiter {
.unwrap_or(0)
})
.sum(),
ValueData::JsonValue(inner) => inner
.as_ref()
.value_data
.as_ref()
.map(Self::size_of_value_data)
.unwrap_or(0),
}
}
}

View File

@@ -34,6 +34,7 @@ roaring = "0.10"
serde.workspace = true
serde_json.workspace = true
snafu.workspace = true
store-api.workspace = true
tantivy = { version = "0.24", features = ["zstd-compression"] }
tantivy-jieba = "0.16"
tokio.workspace = true

View File

@@ -75,3 +75,12 @@ impl Config {
Ok(Self::default())
}
}
impl Analyzer {
pub fn to_str(&self) -> &'static str {
match self {
Analyzer::English => "English",
Analyzer::Chinese => "Chinese",
}
}
}

View File

@@ -21,6 +21,7 @@ pub mod error;
pub mod external_provider;
pub mod fulltext_index;
pub mod inverted_index;
pub mod target;
pub type Bytes = Vec<u8>;
pub type BytesRef<'a> = &'a [u8];

107
src/index/src/target.rs Normal file
View File

@@ -0,0 +1,107 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::any::Any;
use std::fmt::{self, Display};
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use serde::{Deserialize, Serialize};
use snafu::{Snafu, ensure};
use store_api::storage::ColumnId;
/// Describes an index target. Column ids are the only supported variant for now.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum IndexTarget {
ColumnId(ColumnId),
}
impl Display for IndexTarget {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexTarget::ColumnId(id) => write!(f, "{}", id),
}
}
}
impl IndexTarget {
/// Parse a target key string back into an index target description.
pub fn decode(key: &str) -> Result<Self, TargetKeyError> {
validate_column_key(key)?;
let id = key
.parse::<ColumnId>()
.map_err(|_| InvalidColumnIdSnafu { value: key }.build())?;
Ok(IndexTarget::ColumnId(id))
}
}
/// Errors that can occur when working with index target keys.
#[derive(Snafu, Clone, PartialEq, Eq)]
#[stack_trace_debug]
pub enum TargetKeyError {
#[snafu(display("target key cannot be empty"))]
Empty,
#[snafu(display("target key must contain digits only: {key}"))]
InvalidCharacters { key: String },
#[snafu(display("failed to parse column id from '{value}'"))]
InvalidColumnId { value: String },
}
impl ErrorExt for TargetKeyError {
fn status_code(&self) -> StatusCode {
StatusCode::InvalidArguments
}
fn as_any(&self) -> &dyn Any {
self
}
}
fn validate_column_key(key: &str) -> Result<(), TargetKeyError> {
ensure!(!key.is_empty(), EmptySnafu);
ensure!(
key.chars().all(|ch| ch.is_ascii_digit()),
InvalidCharactersSnafu { key }
);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn encode_decode_column() {
let target = IndexTarget::ColumnId(42);
let key = format!("{}", target);
assert_eq!(key, "42");
let decoded = IndexTarget::decode(&key).unwrap();
assert_eq!(decoded, target);
}
#[test]
fn decode_rejects_empty() {
let err = IndexTarget::decode("").unwrap_err();
assert!(matches!(err, TargetKeyError::Empty));
}
#[test]
fn decode_rejects_invalid_digits() {
let err = IndexTarget::decode("1a2").unwrap_err();
assert!(matches!(err, TargetKeyError::InvalidCharacters { .. }));
}
}

View File

@@ -18,5 +18,6 @@
pub mod error;
pub mod kafka;
pub mod metrics;
pub mod noop;
pub mod raft_engine;
pub mod test_util;

15
src/log-store/src/noop.rs Normal file
View File

@@ -0,0 +1,15 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub mod log_store;

View File

@@ -0,0 +1,116 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use futures::stream;
use store_api::logstore::entry::{Entry, NaiveEntry};
use store_api::logstore::provider::Provider;
use store_api::logstore::{AppendBatchResponse, EntryId, LogStore, SendableEntryStream, WalIndex};
use store_api::storage::RegionId;
use crate::error::{Error, Result};
#[derive(Debug, Clone, Copy)]
pub struct NoopLogStore;
#[async_trait::async_trait]
impl LogStore for NoopLogStore {
type Error = Error;
async fn stop(&self) -> Result<()> {
Ok(())
}
async fn append_batch(&self, entries: Vec<Entry>) -> Result<AppendBatchResponse> {
let last_entry_ids = entries
.iter()
.map(|entry| (entry.region_id(), 0))
.collect::<HashMap<RegionId, EntryId>>();
Ok(AppendBatchResponse { last_entry_ids })
}
async fn read(
&self,
_provider: &Provider,
_entry_id: EntryId,
_index: Option<WalIndex>,
) -> Result<SendableEntryStream<'static, Entry, Self::Error>> {
Ok(Box::pin(stream::empty()))
}
async fn create_namespace(&self, _ns: &Provider) -> Result<()> {
Ok(())
}
async fn delete_namespace(&self, _ns: &Provider) -> Result<()> {
Ok(())
}
async fn list_namespaces(&self) -> Result<Vec<Provider>> {
Ok(vec![])
}
fn entry(
&self,
data: Vec<u8>,
entry_id: EntryId,
region_id: RegionId,
provider: &Provider,
) -> Result<Entry> {
Ok(Entry::Naive(NaiveEntry {
provider: provider.clone(),
region_id,
entry_id,
data,
}))
}
async fn obsolete(
&self,
_provider: &Provider,
_region_id: RegionId,
_entry_id: EntryId,
) -> Result<()> {
Ok(())
}
fn latest_entry_id(&self, _provider: &Provider) -> Result<EntryId> {
Ok(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_append_batch() {
let log_store = NoopLogStore;
let entries = vec![Entry::Naive(NaiveEntry {
provider: Provider::noop_provider(),
region_id: RegionId::new(1, 1),
entry_id: 1,
data: vec![1],
})];
let last_entry_ids = log_store
.append_batch(entries)
.await
.unwrap()
.last_entry_ids;
assert_eq!(last_entry_ids.len(), 1);
assert_eq!(last_entry_ids[&(RegionId::new(1, 1))], 0);
}
}

View File

@@ -353,6 +353,8 @@ impl ProcedureExecutor for MetaClient {
}
}
// TODO(zyy17): Allow deprecated fields for backward compatibility. Remove this when the deprecated fields are removed from the proto.
#[allow(deprecated)]
#[async_trait::async_trait]
impl ClusterInfo for MetaClient {
type Error = Error;
@@ -372,25 +374,61 @@ impl ClusterInfo for MetaClient {
let (leader, followers) = cluster_client.get_metasrv_peers().await?;
followers
.into_iter()
.map(|node| NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }),
version: node.version,
git_commit: node.git_commit,
start_time_ms: node.start_time_ms,
cpus: node.cpus,
memory_bytes: node.memory_bytes,
.map(|node| {
if let Some(node_info) = node.info {
NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }),
version: node_info.version,
git_commit: node_info.git_commit,
start_time_ms: node_info.start_time_ms,
cpus: node_info.cpus,
memory_bytes: node_info.memory_bytes,
hostname: node_info.hostname,
}
} else {
// TODO(zyy17): It's for backward compatibility. Remove this when the deprecated fields are removed from the proto.
NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }),
version: node.version,
git_commit: node.git_commit,
start_time_ms: node.start_time_ms,
cpus: node.cpus,
memory_bytes: node.memory_bytes,
hostname: "".to_string(),
}
}
})
.chain(leader.into_iter().map(|node| NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }),
version: node.version,
git_commit: node.git_commit,
start_time_ms: node.start_time_ms,
cpus: node.cpus,
memory_bytes: node.memory_bytes,
.chain(leader.into_iter().map(|node| {
if let Some(node_info) = node.info {
NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }),
version: node_info.version,
git_commit: node_info.git_commit,
start_time_ms: node_info.start_time_ms,
cpus: node_info.cpus,
memory_bytes: node_info.memory_bytes,
hostname: node_info.hostname,
}
} else {
// TODO(zyy17): It's for backward compatibility. Remove this when the deprecated fields are removed from the proto.
NodeInfo {
peer: node.peer.unwrap_or_default(),
last_activity_ts,
status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }),
version: node.version,
git_commit: node.git_commit,
start_time_ms: node.start_time_ms,
cpus: node.cpus,
memory_bytes: node.memory_bytes,
hostname: "".to_string(),
}
}
}))
.collect::<Vec<_>>()
} else {

View File

@@ -53,6 +53,7 @@ either.workspace = true
etcd-client.workspace = true
futures.workspace = true
h2 = "0.3"
hostname.workspace = true
http-body-util = "0.1"
humantime.workspace = true
humantime-serde.workspace = true

View File

@@ -245,6 +245,7 @@ mod tests {
start_time_ms: current_time_millis() as u64,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
let key_prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend);
@@ -270,6 +271,7 @@ mod tests {
start_time_ms: current_time_millis() as u64,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
in_memory
@@ -307,6 +309,7 @@ mod tests {
start_time_ms: last_activity_ts as u64,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
let key_prefix = NodeInfoKey::key_prefix_with_role(Role::Frontend);

View File

@@ -1163,6 +1163,7 @@ mod tests {
start_time_ms: 0,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
mysql_election.register_candidate(&node_info).await.unwrap();
}

View File

@@ -1002,6 +1002,7 @@ mod tests {
start_time_ms: 0,
cpus: 0,
memory_bytes: 0,
hostname: "test_hostname".to_string(),
};
pg_election.register_candidate(&node_info).await.unwrap();
}

View File

@@ -54,6 +54,7 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler {
start_time_ms: info.start_time_ms,
cpus: info.cpus,
memory_bytes: info.memory_bytes,
hostname: info.hostname,
};
put_into_memory_store(ctx, key, value).await?;
@@ -89,6 +90,7 @@ impl HeartbeatHandler for CollectFlownodeClusterInfoHandler {
start_time_ms: info.start_time_ms,
cpus: info.cpus,
memory_bytes: info.memory_bytes,
hostname: info.hostname,
};
put_into_memory_store(ctx, key, value).await?;
@@ -142,6 +144,7 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler {
start_time_ms: info.start_time_ms,
cpus: info.cpus,
memory_bytes: info.memory_bytes,
hostname: info.hostname,
};
put_into_memory_store(ctx, key, value).await?;

View File

@@ -375,11 +375,16 @@ pub struct MetasrvNodeInfo {
// The node cpus
#[serde(default)]
pub cpus: u32,
#[serde(default)]
// The node memory bytes
#[serde(default)]
pub memory_bytes: u64,
// The node hostname
#[serde(default)]
pub hostname: String,
}
// TODO(zyy17): Allow deprecated fields for backward compatibility. Remove this when the deprecated top-level fields are removed from the proto.
#[allow(deprecated)]
impl From<MetasrvNodeInfo> for api::v1::meta::MetasrvNodeInfo {
fn from(node_info: MetasrvNodeInfo) -> Self {
Self {
@@ -387,11 +392,22 @@ impl From<MetasrvNodeInfo> for api::v1::meta::MetasrvNodeInfo {
addr: node_info.addr,
..Default::default()
}),
version: node_info.version,
git_commit: node_info.git_commit,
// TODO(zyy17): The following top-level fields are deprecated. They are kept for backward compatibility and will be removed in a future version.
// New code should use the fields in `info.NodeInfo` instead.
version: node_info.version.clone(),
git_commit: node_info.git_commit.clone(),
start_time_ms: node_info.start_time_ms,
cpus: node_info.cpus,
memory_bytes: node_info.memory_bytes,
// The canonical location for node information.
info: Some(api::v1::meta::NodeInfo {
version: node_info.version,
git_commit: node_info.git_commit,
start_time_ms: node_info.start_time_ms,
cpus: node_info.cpus,
memory_bytes: node_info.memory_bytes,
hostname: node_info.hostname,
}),
}
}
}
@@ -696,6 +712,10 @@ impl Metasrv {
start_time_ms: self.start_time_ms(),
cpus: self.resource_spec().cpus as u32,
memory_bytes: self.resource_spec().memory.unwrap_or_default().as_bytes(),
hostname: hostname::get()
.unwrap_or_default()
.to_string_lossy()
.to_string(),
}
}

View File

@@ -80,12 +80,12 @@ impl CloseDowngradedRegion {
let RegionInfo { engine, .. } = datanode_table_value.region_info.clone();
Ok(Instruction::CloseRegion(RegionIdent {
Ok(Instruction::CloseRegions(vec![RegionIdent {
datanode_id: downgrade_leader_datanode_id,
table_id,
region_number,
engine,
}))
}]))
}
/// Closes the downgraded leader region.
@@ -121,7 +121,7 @@ impl CloseDowngradedRegion {
"Received close downgraded leade region reply: {:?}, region: {}",
reply, region_id
);
let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else {
let InstructionReply::CloseRegions(SimpleReply { result, error }) = reply else {
return error::UnexpectedInstructionReplySnafu {
mailbox_message: msg.to_string(),
reason: "expect close region reply",

Some files were not shown because too many files have changed in this diff Show More