mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2025-12-22 22:20:02 +00:00
feat: implement compressed CSV/JSON export functionality (#7162)
* feat: implement compressed CSV/JSON export functionality - Add CompressedWriter for real-time compression during CSV/JSON export - Support GZIP, BZIP2, XZ, ZSTD compression formats - Remove LazyBufferedWriter dependency for simplified architecture - Implement Encoder -> Compressor -> FileWriter data flow - Add tests for compressed CSV/JSON export Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON export functionality - refactor and extend compressed_writer tests - add coverage for Bzip2 and Xz compression Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON export functionality - Switch to threshold-based chunked flushing - Avoid unnecessary writes on empty buffers - Replace direct write_all() calls with the new helper for consistency Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON import (COPY FROM) functionality - Add support for reading compressed CSV and JSON in COPY FROM - Support GZIP, BZIP2, XZ, ZSTD compression formats - Add tests for compressed CSV/JSON import Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON export/import functionality - Fix review comments Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON export/import functionality - Move temp_dir out of the loop Signed-off-by: McKnight22 <tao.wang.22@outlook.com> * feat: implement compressed CSV/JSON export/import functionality - Fix unreasonable locking logic Co-authored-by: jeremyhi <jiachun_feng@proton.me> Signed-off-by: McKnight22 <tao.wang.22@outlook.com> --------- Signed-off-by: McKnight22 <tao.wang.22@outlook.com> Co-authored-by: jeremyhi <jiachun_feng@proton.me>
This commit is contained in:
@@ -0,0 +1,233 @@
|
||||
-- Test compressed CSV import functionality
|
||||
-- First, create and export data with different compression types
|
||||
CREATE TABLE test_csv_export(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_csv_export(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Export with different compression types
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test importing uncompressed CSV
|
||||
CREATE TABLE test_csv_import_uncompressed(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_csv_import_uncompressed FROM '${SQLNESS_HOME}/import/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as uncompressed_count FROM test_csv_import_uncompressed;
|
||||
|
||||
+--------------------+
|
||||
| uncompressed_count |
|
||||
+--------------------+
|
||||
| 5 |
|
||||
+--------------------+
|
||||
|
||||
-- Test importing GZIP compressed CSV
|
||||
CREATE TABLE test_csv_import_gzip(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_csv_import_gzip FROM '${SQLNESS_HOME}/import/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as gzip_count FROM test_csv_import_gzip;
|
||||
|
||||
+------------+
|
||||
| gzip_count |
|
||||
+------------+
|
||||
| 5 |
|
||||
+------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_gzip WHERE `id` = 1;
|
||||
|
||||
+----+-------+-------+
|
||||
| id | name | value |
|
||||
+----+-------+-------+
|
||||
| 1 | Alice | 10.5 |
|
||||
+----+-------+-------+
|
||||
|
||||
-- Test importing ZSTD compressed CSV
|
||||
CREATE TABLE test_csv_import_zstd(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_csv_import_zstd FROM '${SQLNESS_HOME}/import/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as zstd_count FROM test_csv_import_zstd;
|
||||
|
||||
+------------+
|
||||
| zstd_count |
|
||||
+------------+
|
||||
| 5 |
|
||||
+------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_zstd WHERE `id` = 2;
|
||||
|
||||
+----+------+-------+
|
||||
| id | name | value |
|
||||
+----+------+-------+
|
||||
| 2 | Bob | 20.3 |
|
||||
+----+------+-------+
|
||||
|
||||
-- Test importing BZIP2 compressed CSV
|
||||
CREATE TABLE test_csv_import_bzip2(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_csv_import_bzip2 FROM '${SQLNESS_HOME}/import/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as bzip2_count FROM test_csv_import_bzip2;
|
||||
|
||||
+-------------+
|
||||
| bzip2_count |
|
||||
+-------------+
|
||||
| 5 |
|
||||
+-------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_bzip2 WHERE `id` = 3;
|
||||
|
||||
+----+---------+-------+
|
||||
| id | name | value |
|
||||
+----+---------+-------+
|
||||
| 3 | Charlie | 30.7 |
|
||||
+----+---------+-------+
|
||||
|
||||
-- Test importing XZ compressed CSV
|
||||
CREATE TABLE test_csv_import_xz(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_csv_import_xz FROM '${SQLNESS_HOME}/import/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as xz_count FROM test_csv_import_xz;
|
||||
|
||||
+----------+
|
||||
| xz_count |
|
||||
+----------+
|
||||
| 5 |
|
||||
+----------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_xz WHERE `id` = 4;
|
||||
|
||||
+----+-------+-------+
|
||||
| id | name | value |
|
||||
+----+-------+-------+
|
||||
| 4 | David | 40.1 |
|
||||
+----+-------+-------+
|
||||
|
||||
-- Verify data integrity by comparing all imported tables
|
||||
SELECT source, count FROM (
|
||||
SELECT 'uncompressed' as source, COUNT(*) as count, 1 as order_key FROM test_csv_import_uncompressed
|
||||
UNION ALL
|
||||
SELECT 'gzip', COUNT(*) as count, 2 as order_key FROM test_csv_import_gzip
|
||||
UNION ALL
|
||||
SELECT 'zstd', COUNT(*) as count, 3 as order_key FROM test_csv_import_zstd
|
||||
UNION ALL
|
||||
SELECT 'bzip2', COUNT(*) as count, 4 as order_key FROM test_csv_import_bzip2
|
||||
UNION ALL
|
||||
SELECT 'xz', COUNT(*) as count, 5 as order_key FROM test_csv_import_xz
|
||||
) AS subquery
|
||||
ORDER BY order_key;
|
||||
|
||||
+--------------+-------+
|
||||
| source | count |
|
||||
+--------------+-------+
|
||||
| uncompressed | 5 |
|
||||
| gzip | 5 |
|
||||
| zstd | 5 |
|
||||
| bzip2 | 5 |
|
||||
| xz | 5 |
|
||||
+--------------+-------+
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_csv_export;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_csv_import_uncompressed;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_csv_import_gzip;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_csv_import_zstd;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_csv_import_bzip2;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_csv_import_xz;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
109
tests/cases/standalone/common/copy/copy_from_csv_compressed.sql
Normal file
109
tests/cases/standalone/common/copy/copy_from_csv_compressed.sql
Normal file
@@ -0,0 +1,109 @@
|
||||
-- Test compressed CSV import functionality
|
||||
-- First, create and export data with different compression types
|
||||
CREATE TABLE test_csv_export(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_csv_export(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
-- Export with different compression types
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
COPY test_csv_export TO '${SQLNESS_HOME}/import/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
-- Test importing uncompressed CSV
|
||||
CREATE TABLE test_csv_import_uncompressed(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_csv_import_uncompressed FROM '${SQLNESS_HOME}/import/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
|
||||
SELECT COUNT(*) as uncompressed_count FROM test_csv_import_uncompressed;
|
||||
|
||||
-- Test importing GZIP compressed CSV
|
||||
CREATE TABLE test_csv_import_gzip(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_csv_import_gzip FROM '${SQLNESS_HOME}/import/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
|
||||
SELECT COUNT(*) as gzip_count FROM test_csv_import_gzip;
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_gzip WHERE `id` = 1;
|
||||
|
||||
-- Test importing ZSTD compressed CSV
|
||||
CREATE TABLE test_csv_import_zstd(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_csv_import_zstd FROM '${SQLNESS_HOME}/import/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
|
||||
SELECT COUNT(*) as zstd_count FROM test_csv_import_zstd;
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_zstd WHERE `id` = 2;
|
||||
|
||||
-- Test importing BZIP2 compressed CSV
|
||||
CREATE TABLE test_csv_import_bzip2(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_csv_import_bzip2 FROM '${SQLNESS_HOME}/import/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
|
||||
SELECT COUNT(*) as bzip2_count FROM test_csv_import_bzip2;
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_bzip2 WHERE `id` = 3;
|
||||
|
||||
-- Test importing XZ compressed CSV
|
||||
CREATE TABLE test_csv_import_xz(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_csv_import_xz FROM '${SQLNESS_HOME}/import/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
SELECT COUNT(*) as xz_count FROM test_csv_import_xz;
|
||||
SELECT `id`, `name`, `value` FROM test_csv_import_xz WHERE `id` = 4;
|
||||
|
||||
-- Verify data integrity by comparing all imported tables
|
||||
SELECT source, count FROM (
|
||||
SELECT 'uncompressed' as source, COUNT(*) as count, 1 as order_key FROM test_csv_import_uncompressed
|
||||
UNION ALL
|
||||
SELECT 'gzip', COUNT(*) as count, 2 as order_key FROM test_csv_import_gzip
|
||||
UNION ALL
|
||||
SELECT 'zstd', COUNT(*) as count, 3 as order_key FROM test_csv_import_zstd
|
||||
UNION ALL
|
||||
SELECT 'bzip2', COUNT(*) as count, 4 as order_key FROM test_csv_import_bzip2
|
||||
UNION ALL
|
||||
SELECT 'xz', COUNT(*) as count, 5 as order_key FROM test_csv_import_xz
|
||||
) AS subquery
|
||||
ORDER BY order_key;
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_csv_export;
|
||||
DROP TABLE test_csv_import_uncompressed;
|
||||
DROP TABLE test_csv_import_gzip;
|
||||
DROP TABLE test_csv_import_zstd;
|
||||
DROP TABLE test_csv_import_bzip2;
|
||||
DROP TABLE test_csv_import_xz;
|
||||
@@ -0,0 +1,233 @@
|
||||
-- Test compressed JSON import functionality
|
||||
-- First, create and export data with different compression types
|
||||
CREATE TABLE test_json_export(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_json_export(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Export with different compression types
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_uncompressed.json' WITH (format='json');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test importing uncompressed JSON
|
||||
CREATE TABLE test_json_import_uncompressed(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_json_import_uncompressed FROM '${SQLNESS_HOME}/import/test_json_uncompressed.json' WITH (format='json');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as uncompressed_count FROM test_json_import_uncompressed;
|
||||
|
||||
+--------------------+
|
||||
| uncompressed_count |
|
||||
+--------------------+
|
||||
| 5 |
|
||||
+--------------------+
|
||||
|
||||
-- Test importing GZIP compressed JSON
|
||||
CREATE TABLE test_json_import_gzip(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_json_import_gzip FROM '${SQLNESS_HOME}/import/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as gzip_count FROM test_json_import_gzip;
|
||||
|
||||
+------------+
|
||||
| gzip_count |
|
||||
+------------+
|
||||
| 5 |
|
||||
+------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_gzip WHERE `id` = 1;
|
||||
|
||||
+----+-------+-------+
|
||||
| id | name | value |
|
||||
+----+-------+-------+
|
||||
| 1 | Alice | 10.5 |
|
||||
+----+-------+-------+
|
||||
|
||||
-- Test importing ZSTD compressed JSON
|
||||
CREATE TABLE test_json_import_zstd(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_json_import_zstd FROM '${SQLNESS_HOME}/import/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as zstd_count FROM test_json_import_zstd;
|
||||
|
||||
+------------+
|
||||
| zstd_count |
|
||||
+------------+
|
||||
| 5 |
|
||||
+------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_zstd WHERE `id` = 2;
|
||||
|
||||
+----+------+-------+
|
||||
| id | name | value |
|
||||
+----+------+-------+
|
||||
| 2 | Bob | 20.3 |
|
||||
+----+------+-------+
|
||||
|
||||
-- Test importing BZIP2 compressed JSON
|
||||
CREATE TABLE test_json_import_bzip2(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_json_import_bzip2 FROM '${SQLNESS_HOME}/import/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as bzip2_count FROM test_json_import_bzip2;
|
||||
|
||||
+-------------+
|
||||
| bzip2_count |
|
||||
+-------------+
|
||||
| 5 |
|
||||
+-------------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_bzip2 WHERE `id` = 3;
|
||||
|
||||
+----+---------+-------+
|
||||
| id | name | value |
|
||||
+----+---------+-------+
|
||||
| 3 | Charlie | 30.7 |
|
||||
+----+---------+-------+
|
||||
|
||||
-- Test importing XZ compressed JSON
|
||||
CREATE TABLE test_json_import_xz(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
COPY test_json_import_xz FROM '${SQLNESS_HOME}/import/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
SELECT COUNT(*) as xz_count FROM test_json_import_xz;
|
||||
|
||||
+----------+
|
||||
| xz_count |
|
||||
+----------+
|
||||
| 5 |
|
||||
+----------+
|
||||
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_xz WHERE `id` = 4;
|
||||
|
||||
+----+-------+-------+
|
||||
| id | name | value |
|
||||
+----+-------+-------+
|
||||
| 4 | David | 40.1 |
|
||||
+----+-------+-------+
|
||||
|
||||
-- Verify data integrity by comparing all imported tables
|
||||
SELECT source, count FROM (
|
||||
SELECT 'uncompressed' as source, COUNT(*) as count, 1 as order_key FROM test_json_import_uncompressed
|
||||
UNION ALL
|
||||
SELECT 'gzip', COUNT(*) as count, 2 as order_key FROM test_json_import_gzip
|
||||
UNION ALL
|
||||
SELECT 'zstd', COUNT(*) as count, 3 as order_key FROM test_json_import_zstd
|
||||
UNION ALL
|
||||
SELECT 'bzip2', COUNT(*) as count, 4 as order_key FROM test_json_import_bzip2
|
||||
UNION ALL
|
||||
SELECT 'xz', COUNT(*) as count, 5 as order_key FROM test_json_import_xz
|
||||
) AS subquery
|
||||
ORDER BY order_key;
|
||||
|
||||
+--------------+-------+
|
||||
| source | count |
|
||||
+--------------+-------+
|
||||
| uncompressed | 5 |
|
||||
| gzip | 5 |
|
||||
| zstd | 5 |
|
||||
| bzip2 | 5 |
|
||||
| xz | 5 |
|
||||
+--------------+-------+
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_json_export;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_json_import_uncompressed;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_json_import_gzip;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_json_import_zstd;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_json_import_bzip2;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
DROP TABLE test_json_import_xz;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
109
tests/cases/standalone/common/copy/copy_from_json_compressed.sql
Normal file
109
tests/cases/standalone/common/copy/copy_from_json_compressed.sql
Normal file
@@ -0,0 +1,109 @@
|
||||
-- Test compressed JSON import functionality
|
||||
-- First, create and export data with different compression types
|
||||
CREATE TABLE test_json_export(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_json_export(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
-- Export with different compression types
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_uncompressed.json' WITH (format='json');
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
COPY test_json_export TO '${SQLNESS_HOME}/import/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
-- Test importing uncompressed JSON
|
||||
CREATE TABLE test_json_import_uncompressed(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_json_import_uncompressed FROM '${SQLNESS_HOME}/import/test_json_uncompressed.json' WITH (format='json');
|
||||
|
||||
SELECT COUNT(*) as uncompressed_count FROM test_json_import_uncompressed;
|
||||
|
||||
-- Test importing GZIP compressed JSON
|
||||
CREATE TABLE test_json_import_gzip(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_json_import_gzip FROM '${SQLNESS_HOME}/import/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
|
||||
SELECT COUNT(*) as gzip_count FROM test_json_import_gzip;
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_gzip WHERE `id` = 1;
|
||||
|
||||
-- Test importing ZSTD compressed JSON
|
||||
CREATE TABLE test_json_import_zstd(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_json_import_zstd FROM '${SQLNESS_HOME}/import/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
|
||||
SELECT COUNT(*) as zstd_count FROM test_json_import_zstd;
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_zstd WHERE `id` = 2;
|
||||
|
||||
-- Test importing BZIP2 compressed JSON
|
||||
CREATE TABLE test_json_import_bzip2(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_json_import_bzip2 FROM '${SQLNESS_HOME}/import/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
|
||||
SELECT COUNT(*) as bzip2_count FROM test_json_import_bzip2;
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_bzip2 WHERE `id` = 3;
|
||||
|
||||
-- Test importing XZ compressed JSON
|
||||
CREATE TABLE test_json_import_xz(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
COPY test_json_import_xz FROM '${SQLNESS_HOME}/import/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
SELECT COUNT(*) as xz_count FROM test_json_import_xz;
|
||||
SELECT `id`, `name`, `value` FROM test_json_import_xz WHERE `id` = 4;
|
||||
|
||||
-- Verify data integrity by comparing all imported tables
|
||||
SELECT source, count FROM (
|
||||
SELECT 'uncompressed' as source, COUNT(*) as count, 1 as order_key FROM test_json_import_uncompressed
|
||||
UNION ALL
|
||||
SELECT 'gzip', COUNT(*) as count, 2 as order_key FROM test_json_import_gzip
|
||||
UNION ALL
|
||||
SELECT 'zstd', COUNT(*) as count, 3 as order_key FROM test_json_import_zstd
|
||||
UNION ALL
|
||||
SELECT 'bzip2', COUNT(*) as count, 4 as order_key FROM test_json_import_bzip2
|
||||
UNION ALL
|
||||
SELECT 'xz', COUNT(*) as count, 5 as order_key FROM test_json_import_xz
|
||||
) AS subquery
|
||||
ORDER BY order_key;
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_json_export;
|
||||
DROP TABLE test_json_import_uncompressed;
|
||||
DROP TABLE test_json_import_gzip;
|
||||
DROP TABLE test_json_import_zstd;
|
||||
DROP TABLE test_json_import_bzip2;
|
||||
DROP TABLE test_json_import_xz;
|
||||
@@ -0,0 +1,65 @@
|
||||
-- Test compressed CSV export functionality
|
||||
CREATE TABLE test_csv_compression(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_csv_compression(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test uncompressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test GZIP compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test ZSTD compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test BZIP2 compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test XZ compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test compressed CSV with custom delimiter b'\t' (ASCII 9)
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_tab_separated.csv.gz' WITH (format='csv', compression_type='gzip', delimiter='9');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test compressed CSV with timestamp format
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_timestamp_format.csv.zst' WITH (format='csv', compression_type='zstd', timestamp_format='%Y-%m-%d %H:%M:%S');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test compressed CSV with date format
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_date_format.csv.gz' WITH (format='csv', compression_type='gzip', date_format='%Y/%m/%d');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_csv_compression;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Test compressed CSV export functionality
|
||||
CREATE TABLE test_csv_compression(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_csv_compression(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
-- Test uncompressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_uncompressed.csv' WITH (format='csv');
|
||||
|
||||
-- Test GZIP compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_gzip.csv.gz' WITH (format='csv', compression_type='gzip');
|
||||
|
||||
-- Test ZSTD compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_zstd.csv.zst' WITH (format='csv', compression_type='zstd');
|
||||
|
||||
-- Test BZIP2 compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_bzip2.csv.bz2' WITH (format='csv', compression_type='bzip2');
|
||||
|
||||
-- Test XZ compressed CSV export
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_xz.csv.xz' WITH (format='csv', compression_type='xz');
|
||||
|
||||
-- Test compressed CSV with custom delimiter b'\t' (ASCII 9)
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_tab_separated.csv.gz' WITH (format='csv', compression_type='gzip', delimiter='9');
|
||||
|
||||
-- Test compressed CSV with timestamp format
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_timestamp_format.csv.zst' WITH (format='csv', compression_type='zstd', timestamp_format='%Y-%m-%d %H:%M:%S');
|
||||
|
||||
-- Test compressed CSV with date format
|
||||
COPY test_csv_compression TO '${SQLNESS_HOME}/export/test_csv_date_format.csv.gz' WITH (format='csv', compression_type='gzip', date_format='%Y/%m/%d');
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_csv_compression;
|
||||
@@ -0,0 +1,55 @@
|
||||
-- Test compressed JSON export functionality
|
||||
CREATE TABLE test_json_compression(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_json_compression(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test uncompressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_uncompressed.json' WITH (format='json');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test GZIP compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test ZSTD compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test BZIP2 compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test XZ compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Test compressed JSON with schema inference limit
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_schema_limit.json.gz' WITH (format='json', compression_type='gzip', schema_infer_max_record=100);
|
||||
|
||||
Affected Rows: 5
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_json_compression;
|
||||
|
||||
Affected Rows: 0
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
-- Test compressed JSON export functionality
|
||||
CREATE TABLE test_json_compression(
|
||||
`id` UINT32,
|
||||
`name` STRING,
|
||||
`value` DOUBLE,
|
||||
ts TIMESTAMP TIME INDEX
|
||||
);
|
||||
|
||||
-- Insert test data
|
||||
INSERT INTO test_json_compression(`id`, `name`, `value`, ts) VALUES
|
||||
(1, 'Alice', 10.5, 1640995200000),
|
||||
(2, 'Bob', 20.3, 1640995260000),
|
||||
(3, 'Charlie', 30.7, 1640995320000),
|
||||
(4, 'David', 40.1, 1640995380000),
|
||||
(5, 'Eve', 50.9, 1640995440000);
|
||||
|
||||
-- Test uncompressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_uncompressed.json' WITH (format='json');
|
||||
|
||||
-- Test GZIP compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_gzip.json.gz' WITH (format='json', compression_type='gzip');
|
||||
|
||||
-- Test ZSTD compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_zstd.json.zst' WITH (format='json', compression_type='zstd');
|
||||
|
||||
-- Test BZIP2 compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_bzip2.json.bz2' WITH (format='json', compression_type='bzip2');
|
||||
|
||||
-- Test XZ compressed JSON export
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_xz.json.xz' WITH (format='json', compression_type='xz');
|
||||
|
||||
-- Test compressed JSON with schema inference limit
|
||||
COPY test_json_compression TO '${SQLNESS_HOME}/export/test_json_schema_limit.json.gz' WITH (format='json', compression_type='gzip', schema_infer_max_record=100);
|
||||
|
||||
-- Clean up
|
||||
DROP TABLE test_json_compression;
|
||||
Reference in New Issue
Block a user