Add LSM3 implementation

This commit is contained in:
Konstantin Knizhnik
2025-03-14 08:42:13 +02:00
committed by Konstantin Knizhnik
parent b7c6738524
commit fedb42dcb6
11 changed files with 2447 additions and 0 deletions

View File

@@ -180,6 +180,11 @@ postgres-check-%: postgres-%
.PHONY: neon-pg-ext-%
neon-pg-ext-%: postgres-%
+@echo "Compiling lsm3 $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/lsm3-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/lsm3-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/lsm3/Makefile install
+@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \

34
pgxn/lsm3/META.json Normal file
View File

@@ -0,0 +1,34 @@
{
"name": "lsm3",
"abstract": "LSM tree implemented using standard Postgres B-Tree indexes",
"description": "LSM3 provides fast inserts because of background index updates and improving index access locality.",
"version": "0.1.3",
"maintainer": ["Konstantin Kninzhik <knizhnik@garret.ru>"],
"license": {
"PostgreSQL": "http://www.postgresql.org/about/licence"
},
"provides": {
"lsm3": {
"file": "lsm3--1.0.sql",
"docfile": "README.md",
"version": "0.1.3",
"abstract": "LSM tree implemented using standard Postgres B-Tree indexes"
}
},
"tags": ["LSM", "Log-structured merge-tree"],
"resources": {
"repository": {
"url": "https://github.com/postgrespro/lsm3.git",
"web": "https://github.com/postgrespro/lsm3",
"type": "git"
},
"bugtracker": {
"web": "https://github.com/postgrespro/lsm3/issues"
}
},
"generated_by": "Konstantin Knizhnik",
"meta-spec": {
"version": "1.0.0",
"url": "http://pgxn.org/meta/spec.txt"
}
}

14
pgxn/lsm3/Makefile Normal file
View File

@@ -0,0 +1,14 @@
MODULE_big = lsm3
OBJS = lsm3.o
PGFILEDESC = "lsm3 - MVCC storage with undo log"
EXTENSION = lsm3
DATA = lsm3--1.0.sql
REGRESS = test
REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/lsm3/lsm3.conf
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)

42
pgxn/lsm3/README.md Normal file
View File

@@ -0,0 +1,42 @@
LSM tree implemented using standard Postgres B-Tree indexes.
Top index is used to perform fast inserts and on overflow it is merged
with base index. To perform merge operation concurrently
without blocking other operations with index, two top indexes are used:
active and merged. So totally there are three B-Tree indexes:
two top indexes and one base index.
When performing index scan we have to merge scans of all this three indexes.
This extension needs to create data structure in shared memory and this is why it should be loaded through
"shared_preload_library" list. Once extension is created, you can define indexes using lsm3 access method:
```sql
create extension lsm3;
create table t(id integer, val text);
create index idx on t using lsm3(id);
```
`Lsm3` provides for the same types and set of operations as standard B-Tree.
Current restrictions of `Lsm3`:
- Parallel index scan is not supported.
- Array keys are not supported.
- `Lsm3` index can not be declared as unique.
`Lsm3` extension can be configured using the following parameters:
- `lsm3.max_indexes`: maximal number of Lsm3 indexes (default 1024).
- `lsm3.top_index_size`: size (kb) of top index (default 64Mb).
It is also possible to specify size of top index in relation options - this value will override `lsm3.top_index_size` GUC.
Although unique constraint can not be enforced using Lsm3 index, it is still possible to mark index as unique to
optimize index search. If index is marked as unique and searched key is found in active
top index, then lookup in other two indexes is not performed. As far as application is most frequently
searching for last recently inserted data, we can speedup this search by performing just one index lookup instead of 3.
Index can be marked as unique using index options:
```sql
create index idx on t using lsm3(id) with (unique=true);
```
Please notice that Lsm3 creates bgworker merge process for each Lsm3 index.
So you may need to adjust `max_worker_processes` in postgresql.conf to be large enough.

177
pgxn/lsm3/expected/test.out Normal file
View File

@@ -0,0 +1,177 @@
create extension lsm3;
create table t(k bigint, val bigint);
create index lsm3_index on t using lsm3(k);
set enable_seqscan=off;
insert into t values (1,10);
select lsm3_start_merge('lsm3_index');
lsm3_start_merge
------------------
(1 row)
select lsm3_wait_merge_completion('lsm3_index');
lsm3_wait_merge_completion
----------------------------
(1 row)
insert into t values (2,20);
select lsm3_start_merge('lsm3_index');
lsm3_start_merge
------------------
(1 row)
select lsm3_wait_merge_completion('lsm3_index');
lsm3_wait_merge_completion
----------------------------
(1 row)
insert into t values (3,30);
select lsm3_start_merge('lsm3_index');
lsm3_start_merge
------------------
(1 row)
select lsm3_wait_merge_completion('lsm3_index');
lsm3_wait_merge_completion
----------------------------
(1 row)
insert into t values (4,40);
select lsm3_start_merge('lsm3_index');
lsm3_start_merge
------------------
(1 row)
select lsm3_wait_merge_completion('lsm3_index');
lsm3_wait_merge_completion
----------------------------
(1 row)
insert into t values (5,50);
select lsm3_start_merge('lsm3_index');
lsm3_start_merge
------------------
(1 row)
select lsm3_wait_merge_completion('lsm3_index');
lsm3_wait_merge_completion
----------------------------
(1 row)
select lsm3_get_merge_count('lsm3_index');
lsm3_get_merge_count
----------------------
5
(1 row)
select * from t where k = 1;
k | val
---+-----
1 | 10
(1 row)
select * from t order by k;
k | val
---+-----
1 | 10
2 | 20
3 | 30
4 | 40
5 | 50
(5 rows)
select * from t order by k desc;
k | val
---+-----
5 | 50
4 | 40
3 | 30
2 | 20
1 | 10
(5 rows)
analyze t;
explain (COSTS OFF, TIMING OFF, SUMMARY OFF) select * from t order by k;
QUERY PLAN
----------------------------------
Index Scan using lsm3_index on t
(1 row)
insert into t values (generate_series(1,100000), 1);
insert into t values (generate_series(1000001,200000), 2);
insert into t values (generate_series(2000001,300000), 3);
insert into t values (generate_series(1,100000), 1);
insert into t values (generate_series(1000001,200000), 2);
insert into t values (generate_series(2000001,300000), 3);
select * from t where k = 1;
k | val
---+-----
1 | 10
1 | 1
1 | 1
(3 rows)
select * from t where k = 1000000;
k | val
---+-----
(0 rows)
select * from t where k = 2000000;
k | val
---+-----
(0 rows)
select * from t where k = 3000000;
k | val
---+-----
(0 rows)
analyze t;
explain (COSTS OFF, TIMING OFF, SUMMARY OFF) select * from t where k = 1;
QUERY PLAN
----------------------------------
Index Scan using lsm3_index on t
Index Cond: (k = 1)
(2 rows)
select lsm3_get_merge_count('lsm3_index') > 5;
?column?
----------
t
(1 row)
truncate table t;
insert into t values (generate_series(1,1000000), 1);
select * from t where k = 1;
k | val
---+-----
1 | 1
(1 row)
reindex table t;
select * from t where k = 1;
k | val
---+-----
1 | 1
(1 row)
drop table t;
create table lsm(k bigint);
insert into lsm values (generate_series(1, 1000000));
create index concurrently on lsm using lsm3(k);
select * from lsm where k = 1;
k
---
1
(1 row)
drop table lsm;

803
pgxn/lsm3/lsm3--1.0.sql Normal file
View File

@@ -0,0 +1,803 @@
-- Lsm3 operators
CREATE OR REPLACE FUNCTION lsm3_handler(internal)
RETURNS index_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE ACCESS METHOD lsm3 TYPE INDEX HANDLER lsm3_handler;
CREATE OPERATOR FAMILY integer_ops USING lsm3;
CREATE OPERATOR CLASS int2_ops DEFAULT
FOR TYPE int2 USING lsm3 FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint2cmp(int2,int2);
CREATE OPERATOR CLASS int4_ops DEFAULT
FOR TYPE int4 USING lsm3 FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint4cmp(int4,int4);
CREATE OPERATOR CLASS int8_ops DEFAULT
FOR TYPE int8 USING lsm3 FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint8cmp(int8,int8);
ALTER OPERATOR FAMILY integer_ops USING lsm3 ADD
OPERATOR 1 < (int2,int4),
OPERATOR 1 < (int2,int8),
OPERATOR 1 < (int4,int2),
OPERATOR 1 < (int4,int8),
OPERATOR 1 < (int8,int2),
OPERATOR 1 < (int8,int4),
OPERATOR 2 <= (int2,int4),
OPERATOR 2 <= (int2,int8),
OPERATOR 2 <= (int4,int2),
OPERATOR 2 <= (int4,int8),
OPERATOR 2 <= (int8,int2),
OPERATOR 2 <= (int8,int4),
OPERATOR 3 = (int2,int4),
OPERATOR 3 = (int2,int8),
OPERATOR 3 = (int4,int2),
OPERATOR 3 = (int4,int8),
OPERATOR 3 = (int8,int2),
OPERATOR 3 = (int8,int4),
OPERATOR 4 >= (int2,int4),
OPERATOR 4 >= (int2,int8),
OPERATOR 4 >= (int4,int2),
OPERATOR 4 >= (int4,int8),
OPERATOR 4 >= (int8,int2),
OPERATOR 4 >= (int8,int4),
OPERATOR 5 > (int2,int4),
OPERATOR 5 > (int2,int8),
OPERATOR 5 > (int4,int2),
OPERATOR 5 > (int4,int8),
OPERATOR 5 > (int8,int2),
OPERATOR 5 > (int8,int4),
FUNCTION 1(int2,int4) btint24cmp(int2,int4),
FUNCTION 1(int2,int8) btint28cmp(int2,int8),
FUNCTION 1(int4,int2) btint42cmp(int4,int2),
FUNCTION 1(int4,int8) btint48cmp(int4,int8),
FUNCTION 1(int8,int4) btint84cmp(int8,int4),
FUNCTION 1(int8,int2) btint82cmp(int8,int2),
FUNCTION 2(int2,int2) btint2sortsupport(internal),
FUNCTION 2(int4,int4) btint4sortsupport(internal),
FUNCTION 2(int8,int8) btint8sortsupport(internal),
FUNCTION 3(int2,int8) in_range(int2,int2,int8,bool,bool),
FUNCTION 3(int2,int4) in_range(int2,int2,int4,bool,bool),
FUNCTION 3(int2,int2) in_range(int2,int2,int2,bool,bool),
FUNCTION 3(int4,int8) in_range(int4,int4,int8,bool,bool),
FUNCTION 3(int4,int4) in_range(int4,int4,int4,bool,bool),
FUNCTION 3(int4,int2) in_range(int4,int4,int2,bool,bool),
FUNCTION 3(int8,int8) in_range(int8,int8,int8,bool,bool),
FUNCTION 4(int2,int2) btequalimage(oid),
FUNCTION 4(int4,int4) btequalimage(oid),
FUNCTION 4(int8,int8) btequalimage(oid);
CREATE OPERATOR FAMILY float_ops USING lsm3;
CREATE OPERATOR CLASS float4_ops DEFAULT
FOR TYPE float4 USING lsm3 FAMILY float_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btfloat4cmp(float4,float4);
CREATE OPERATOR CLASS float8_ops DEFAULT
FOR TYPE float8 USING lsm3 FAMILY float_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btfloat8cmp(float8,float8);
ALTER OPERATOR FAMILY float_ops USING lsm3 ADD
OPERATOR 1 < (float4,float8),
OPERATOR 1 < (float8,float4),
OPERATOR 2 <= (float4,float8),
OPERATOR 2 <= (float8,float4),
OPERATOR 3 = (float4,float8),
OPERATOR 3 = (float8,float4),
OPERATOR 4 >= (float4,float8),
OPERATOR 4 >= (float8,float4),
OPERATOR 5 > (float4,float8),
OPERATOR 5 > (float8,float4),
FUNCTION 1(float4,float8) btfloat48cmp(float4,float8),
FUNCTION 1(float8,float4) btfloat84cmp(float8,float4),
FUNCTION 2(float4,float4) btfloat4sortsupport(internal),
FUNCTION 2(float8,float8) btfloat8sortsupport(internal),
FUNCTION 3(float4,float8) in_range(float4,float4,float8,bool,bool),
FUNCTION 3(float8,float8) in_range(float8,float8,float8,bool,bool);
CREATE OPERATOR CLASS bool_ops DEFAULT
FOR TYPE bool USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btboolcmp(bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS bpchar_ops DEFAULT
FOR TYPE bpchar USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 bpcharcmp(bpchar,bpchar),
FUNCTION 2 bpchar_sortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
CREATE OPERATOR CLASS bytea_ops DEFAULT
FOR TYPE bytea USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 byteacmp(bytea,bytea),
FUNCTION 2 bytea_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS char_ops DEFAULT
FOR TYPE "char" USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btcharcmp("char","char"),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR FAMILY datetime_ops USING lsm3;
CREATE OPERATOR CLASS date_ops DEFAULT
FOR TYPE date USING lsm3 FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 date_cmp(date,date),
FUNCTION 2 date_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timestamp_ops DEFAULT
FOR TYPE timestamp USING lsm3 FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timestamp_cmp(timestamp,timestamp),
FUNCTION 2 timestamp_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timestamptz_ops DEFAULT
FOR TYPE timestamptz USING lsm3 FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz),
FUNCTION 2 timestamp_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
ALTER OPERATOR FAMILY datetime_ops USING lsm3 ADD
OPERATOR 1 < (date,timestamp),
OPERATOR 2 <= (date,timestamp),
OPERATOR 3 = (date,timestamp),
OPERATOR 4 >= (date,timestamp),
OPERATOR 5 > (date,timestamp),
FUNCTION 1(date,timestamp) date_cmp_timestamp(date,timestamp),
OPERATOR 1 < (date,timestamptz),
OPERATOR 2 <= (date,timestamptz),
OPERATOR 3 = (date,timestamptz),
OPERATOR 4 >= (date,timestamptz),
OPERATOR 5 > (date,timestamptz),
FUNCTION 1(date,timestamptz) date_cmp_timestamptz(date,timestamptz),
OPERATOR 1 < (timestamp,date),
OPERATOR 2 <= (timestamp,date),
OPERATOR 3 = (timestamp,date),
OPERATOR 4 >= (timestamp,date),
OPERATOR 5 > (timestamp,date),
FUNCTION 1(timestamp,date) timestamp_cmp_date(timestamp,date),
OPERATOR 1 < (timestamptz,date),
OPERATOR 2 <= (timestamptz,date),
OPERATOR 3 = (timestamptz,date),
OPERATOR 4 >= (timestamptz,date),
OPERATOR 5 > (timestamptz,date),
FUNCTION 1(timestamptz,date) timestamptz_cmp_date(timestamptz,date),
OPERATOR 1 < (timestamp,timestamptz),
OPERATOR 2 <= (timestamp,timestamptz),
OPERATOR 3 = (timestamp,timestamptz),
OPERATOR 4 >= (timestamp,timestamptz),
OPERATOR 5 > (timestamp,timestamptz),
FUNCTION 1(timestamp,timestamptz) timestamp_cmp_timestamptz(timestamp,timestamptz),
OPERATOR 1 < (timestamptz,timestamp),
OPERATOR 2 <= (timestamptz,timestamp),
OPERATOR 3 = (timestamptz,timestamp),
OPERATOR 4 >= (timestamptz,timestamp),
OPERATOR 5 > (timestamptz,timestamp),
FUNCTION 1(timestamptz,timestamp) timestamptz_cmp_timestamp(timestamptz,timestamp),
FUNCTION 3(date,interval) in_range(date,date,interval,bool,bool),
FUNCTION 3(timestamp,interval) in_range(timestamp,timestamp,interval,bool,bool),
FUNCTION 3(timestamptz,interval) in_range(timestamptz,timestamptz,interval,bool,bool);
CREATE OPERATOR CLASS interval_ops DEFAULT
FOR TYPE interval USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 interval_cmp(interval,interval),
FUNCTION 3 in_range(interval,interval,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS macaddr_ops DEFAULT
FOR TYPE macaddr USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 macaddr_cmp(macaddr,macaddr),
FUNCTION 2 macaddr_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS numeric_ops DEFAULT
FOR TYPE numeric USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 numeric_cmp(numeric,numeric),
FUNCTION 2 numeric_sortsupport(internal),
FUNCTION 3 in_range(numeric,numeric,numeric,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS oid_ops DEFAULT
FOR TYPE oid USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btoidcmp(oid,oid),
FUNCTION 2 btoidsortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR FAMILY text_ops USING lsm3;
CREATE OPERATOR CLASS text_ops DEFAULT
FOR TYPE text USING lsm3 FAMILY text_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 bttextcmp(text,text),
FUNCTION 2 bttextsortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
CREATE OPERATOR CLASS name_ops DEFAULT
FOR TYPE name USING lsm3 FAMILY text_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btnamecmp(name,name),
FUNCTION 2 btnamesortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
ALTER OPERATOR FAMILY text_ops USING lsm3 ADD
OPERATOR 1 < (text,name),
OPERATOR 2 <= (text,name),
OPERATOR 3 = (text,name),
OPERATOR 4 >= (text,name),
OPERATOR 5 > (text,name),
FUNCTION 1(text,name) bttextnamecmp(text,name),
OPERATOR 1 < (name,text),
OPERATOR 2 <= (name,text),
OPERATOR 3 = (name,text),
OPERATOR 4 >= (name,text),
OPERATOR 5 > (name,text),
FUNCTION 1(name,text) btnametextcmp(name,text);
CREATE OPERATOR CLASS time_ops DEFAULT
FOR TYPE time USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 time_cmp(time,time),
FUNCTION 3 in_range(time,time,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timetz_ops DEFAULT
FOR TYPE timetz USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timetz_cmp(timetz,timetz),
FUNCTION 3 in_range(timetz,timetz,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS money_ops DEFAULT
FOR TYPE money USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 cash_cmp(money,money),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS uuid_ops DEFAULT
FOR TYPE uuid USING lsm3 AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 uuid_cmp(uuid,uuid),
FUNCTION 2 uuid_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
-- lsm3_bree_wrapper operators
CREATE OR REPLACE FUNCTION lsm3_btree_wrapper(internal)
RETURNS index_am_handler
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE ACCESS METHOD lsm3_btree_wrapper TYPE INDEX HANDLER lsm3_btree_wrapper;
CREATE OPERATOR FAMILY integer_ops USING lsm3_btree_wrapper;
CREATE OPERATOR CLASS int2_ops DEFAULT
FOR TYPE int2 USING lsm3_btree_wrapper FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint2cmp(int2,int2);
CREATE OPERATOR CLASS int4_ops DEFAULT
FOR TYPE int4 USING lsm3_btree_wrapper FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint4cmp(int4,int4);
CREATE OPERATOR CLASS int8_ops DEFAULT
FOR TYPE int8 USING lsm3_btree_wrapper FAMILY integer_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btint8cmp(int8,int8);
ALTER OPERATOR FAMILY integer_ops USING lsm3_btree_wrapper ADD
OPERATOR 1 < (int2,int4),
OPERATOR 1 < (int2,int8),
OPERATOR 1 < (int4,int2),
OPERATOR 1 < (int4,int8),
OPERATOR 1 < (int8,int2),
OPERATOR 1 < (int8,int4),
OPERATOR 2 <= (int2,int4),
OPERATOR 2 <= (int2,int8),
OPERATOR 2 <= (int4,int2),
OPERATOR 2 <= (int4,int8),
OPERATOR 2 <= (int8,int2),
OPERATOR 2 <= (int8,int4),
OPERATOR 3 = (int2,int4),
OPERATOR 3 = (int2,int8),
OPERATOR 3 = (int4,int2),
OPERATOR 3 = (int4,int8),
OPERATOR 3 = (int8,int2),
OPERATOR 3 = (int8,int4),
OPERATOR 4 >= (int2,int4),
OPERATOR 4 >= (int2,int8),
OPERATOR 4 >= (int4,int2),
OPERATOR 4 >= (int4,int8),
OPERATOR 4 >= (int8,int2),
OPERATOR 4 >= (int8,int4),
OPERATOR 5 > (int2,int4),
OPERATOR 5 > (int2,int8),
OPERATOR 5 > (int4,int2),
OPERATOR 5 > (int4,int8),
OPERATOR 5 > (int8,int2),
OPERATOR 5 > (int8,int4),
FUNCTION 1(int2,int4) btint24cmp(int2,int4),
FUNCTION 1(int2,int8) btint28cmp(int2,int8),
FUNCTION 1(int4,int2) btint42cmp(int4,int2),
FUNCTION 1(int4,int8) btint48cmp(int4,int8),
FUNCTION 1(int8,int4) btint84cmp(int8,int4),
FUNCTION 1(int8,int2) btint82cmp(int8,int2),
FUNCTION 2(int2,int2) btint2sortsupport(internal),
FUNCTION 2(int4,int4) btint4sortsupport(internal),
FUNCTION 2(int8,int8) btint8sortsupport(internal),
FUNCTION 3(int2,int8) in_range(int2,int2,int8,bool,bool),
FUNCTION 3(int2,int4) in_range(int2,int2,int4,bool,bool),
FUNCTION 3(int2,int2) in_range(int2,int2,int2,bool,bool),
FUNCTION 3(int4,int8) in_range(int4,int4,int8,bool,bool),
FUNCTION 3(int4,int4) in_range(int4,int4,int4,bool,bool),
FUNCTION 3(int4,int2) in_range(int4,int4,int2,bool,bool),
FUNCTION 3(int8,int8) in_range(int8,int8,int8,bool,bool),
FUNCTION 4(int2,int2) btequalimage(oid),
FUNCTION 4(int4,int4) btequalimage(oid),
FUNCTION 4(int8,int8) btequalimage(oid);
CREATE OPERATOR FAMILY float_ops USING lsm3_btree_wrapper;
CREATE OPERATOR CLASS float4_ops DEFAULT
FOR TYPE float4 USING lsm3_btree_wrapper FAMILY float_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btfloat4cmp(float4,float4);
CREATE OPERATOR CLASS float8_ops DEFAULT
FOR TYPE float8 USING lsm3_btree_wrapper FAMILY float_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btfloat8cmp(float8,float8);
ALTER OPERATOR FAMILY float_ops USING lsm3_btree_wrapper ADD
OPERATOR 1 < (float4,float8),
OPERATOR 1 < (float8,float4),
OPERATOR 2 <= (float4,float8),
OPERATOR 2 <= (float8,float4),
OPERATOR 3 = (float4,float8),
OPERATOR 3 = (float8,float4),
OPERATOR 4 >= (float4,float8),
OPERATOR 4 >= (float8,float4),
OPERATOR 5 > (float4,float8),
OPERATOR 5 > (float8,float4),
FUNCTION 1(float4,float8) btfloat48cmp(float4,float8),
FUNCTION 1(float8,float4) btfloat84cmp(float8,float4),
FUNCTION 2(float4,float4) btfloat4sortsupport(internal),
FUNCTION 2(float8,float8) btfloat8sortsupport(internal),
FUNCTION 3(float4,float8) in_range(float4,float4,float8,bool,bool),
FUNCTION 3(float8,float8) in_range(float8,float8,float8,bool,bool);
CREATE OPERATOR CLASS bool_ops DEFAULT
FOR TYPE bool USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btboolcmp(bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS bpchar_ops DEFAULT
FOR TYPE bpchar USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 bpcharcmp(bpchar,bpchar),
FUNCTION 2 bpchar_sortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
CREATE OPERATOR CLASS bytea_ops DEFAULT
FOR TYPE bytea USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 byteacmp(bytea,bytea),
FUNCTION 2 bytea_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS char_ops DEFAULT
FOR TYPE "char" USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btcharcmp("char","char"),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR FAMILY datetime_ops USING lsm3_btree_wrapper;
CREATE OPERATOR CLASS date_ops DEFAULT
FOR TYPE date USING lsm3_btree_wrapper FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 date_cmp(date,date),
FUNCTION 2 date_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timestamp_ops DEFAULT
FOR TYPE timestamp USING lsm3_btree_wrapper FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timestamp_cmp(timestamp,timestamp),
FUNCTION 2 timestamp_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timestamptz_ops DEFAULT
FOR TYPE timestamptz USING lsm3_btree_wrapper FAMILY datetime_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz),
FUNCTION 2 timestamp_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
ALTER OPERATOR FAMILY datetime_ops USING lsm3_btree_wrapper ADD
OPERATOR 1 < (date,timestamp),
OPERATOR 2 <= (date,timestamp),
OPERATOR 3 = (date,timestamp),
OPERATOR 4 >= (date,timestamp),
OPERATOR 5 > (date,timestamp),
FUNCTION 1(date,timestamp) date_cmp_timestamp(date,timestamp),
OPERATOR 1 < (date,timestamptz),
OPERATOR 2 <= (date,timestamptz),
OPERATOR 3 = (date,timestamptz),
OPERATOR 4 >= (date,timestamptz),
OPERATOR 5 > (date,timestamptz),
FUNCTION 1(date,timestamptz) date_cmp_timestamptz(date,timestamptz),
OPERATOR 1 < (timestamp,date),
OPERATOR 2 <= (timestamp,date),
OPERATOR 3 = (timestamp,date),
OPERATOR 4 >= (timestamp,date),
OPERATOR 5 > (timestamp,date),
FUNCTION 1(timestamp,date) timestamp_cmp_date(timestamp,date),
OPERATOR 1 < (timestamptz,date),
OPERATOR 2 <= (timestamptz,date),
OPERATOR 3 = (timestamptz,date),
OPERATOR 4 >= (timestamptz,date),
OPERATOR 5 > (timestamptz,date),
FUNCTION 1(timestamptz,date) timestamptz_cmp_date(timestamptz,date),
OPERATOR 1 < (timestamp,timestamptz),
OPERATOR 2 <= (timestamp,timestamptz),
OPERATOR 3 = (timestamp,timestamptz),
OPERATOR 4 >= (timestamp,timestamptz),
OPERATOR 5 > (timestamp,timestamptz),
FUNCTION 1(timestamp,timestamptz) timestamp_cmp_timestamptz(timestamp,timestamptz),
OPERATOR 1 < (timestamptz,timestamp),
OPERATOR 2 <= (timestamptz,timestamp),
OPERATOR 3 = (timestamptz,timestamp),
OPERATOR 4 >= (timestamptz,timestamp),
OPERATOR 5 > (timestamptz,timestamp),
FUNCTION 1(timestamptz,timestamp) timestamptz_cmp_timestamp(timestamptz,timestamp),
FUNCTION 3(date,interval) in_range(date,date,interval,bool,bool),
FUNCTION 3(timestamp,interval) in_range(timestamp,timestamp,interval,bool,bool),
FUNCTION 3(timestamptz,interval) in_range(timestamptz,timestamptz,interval,bool,bool);
CREATE OPERATOR CLASS interval_ops DEFAULT
FOR TYPE interval USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 interval_cmp(interval,interval),
FUNCTION 3 in_range(interval,interval,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS macaddr_ops DEFAULT
FOR TYPE macaddr USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 macaddr_cmp(macaddr,macaddr),
FUNCTION 2 macaddr_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS numeric_ops DEFAULT
FOR TYPE numeric USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 numeric_cmp(numeric,numeric),
FUNCTION 2 numeric_sortsupport(internal),
FUNCTION 3 in_range(numeric,numeric,numeric,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS oid_ops DEFAULT
FOR TYPE oid USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btoidcmp(oid,oid),
FUNCTION 2 btoidsortsupport(internal),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR FAMILY text_ops USING lsm3_btree_wrapper;
CREATE OPERATOR CLASS text_ops DEFAULT
FOR TYPE text USING lsm3_btree_wrapper FAMILY text_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 bttextcmp(text,text),
FUNCTION 2 bttextsortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
CREATE OPERATOR CLASS name_ops DEFAULT
FOR TYPE name USING lsm3_btree_wrapper FAMILY text_ops AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 btnamecmp(name,name),
FUNCTION 2 btnamesortsupport(internal),
FUNCTION 4 btvarstrequalimage(oid);
ALTER OPERATOR FAMILY text_ops USING lsm3_btree_wrapper ADD
OPERATOR 1 < (text,name),
OPERATOR 2 <= (text,name),
OPERATOR 3 = (text,name),
OPERATOR 4 >= (text,name),
OPERATOR 5 > (text,name),
FUNCTION 1(text,name) bttextnamecmp(text,name),
OPERATOR 1 < (name,text),
OPERATOR 2 <= (name,text),
OPERATOR 3 = (name,text),
OPERATOR 4 >= (name,text),
OPERATOR 5 > (name,text),
FUNCTION 1(name,text) btnametextcmp(name,text);
CREATE OPERATOR CLASS time_ops DEFAULT
FOR TYPE time USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 time_cmp(time,time),
FUNCTION 3 in_range(time,time,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS timetz_ops DEFAULT
FOR TYPE timetz USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 timetz_cmp(timetz,timetz),
FUNCTION 3 in_range(timetz,timetz,interval,bool,bool),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS money_ops DEFAULT
FOR TYPE money USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 cash_cmp(money,money),
FUNCTION 4 btequalimage(oid);
CREATE OPERATOR CLASS uuid_ops DEFAULT
FOR TYPE uuid USING lsm3_btree_wrapper AS
OPERATOR 1 <,
OPERATOR 2 <=,
OPERATOR 3 =,
OPERATOR 4 >=,
OPERATOR 5 >,
FUNCTION 1 uuid_cmp(uuid,uuid),
FUNCTION 2 uuid_sortsupport(internal),
FUNCTION 4 btequalimage(oid);
-- Number of index merges since server start
CREATE FUNCTION lsm3_get_merge_count(index regclass) returns bigint
AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL RESTRICTED;
-- Force merge of top index.
CREATE FUNCTION lsm3_start_merge(index regclass) returns void
AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL RESTRICTED;
-- Wait merge completion
CREATE FUNCTION lsm3_wait_merge_completion(index regclass) returns void
AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL RESTRICTED;
-- Get active top index size
CREATE FUNCTION lsm3_top_index_size(index regclass) returns bigint
AS 'MODULE_PATHNAME' LANGUAGE C STRICT PARALLEL RESTRICTED;

1254
pgxn/lsm3/lsm3.c Normal file

File diff suppressed because it is too large Load Diff

2
pgxn/lsm3/lsm3.conf Normal file
View File

@@ -0,0 +1,2 @@
shared_preload_libraries = 'lsm3'
lsm3.top_index_size=1MB

4
pgxn/lsm3/lsm3.control Normal file
View File

@@ -0,0 +1,4 @@
comment = 'Lsm3 index'
default_version = '1.0'
module_pathname = '$libdir/lsm3'
relocatable = true

53
pgxn/lsm3/lsm3.h Normal file
View File

@@ -0,0 +1,53 @@
/*
* It is too expensive to check index size at each insert because it requires traverse of all index file segments and calling lseek for each.
* But we do not need precise size, so it is enough to do it at each n-th insert. The lagest B-Tree key size is abut 2kb,
* so with N=64K in the worst case error will be less than 128Mb and for 32-bit key just 1Mb.
*/
#define LSM3_CHECK_TOP_INDEX_SIZE_PERIOD (64*1024) /* should be power of two */
/*
* Control structure for Lsm3 index located in shared memory
*/
typedef struct
{
Oid base; /* Oid of base index */
Oid heap; /* Oid of indexed relation */
Oid top[2]; /* Oids of two top indexes */
int access_count[2]; /* Access counter for top indexes */
int active_index; /* Index used for insert */
uint64 n_merges; /* Number of performed merges since database open */
uint64 n_inserts; /* Number of performed inserts since database open */
volatile bool start_merge; /* Start merging of top index with base index */
volatile bool merge_in_progress; /* Overflow of top index intiate merge process */
PGPROC* merger; /* Merger background worker */
Oid db_id; /* user ID (for background worker) */
Oid user_id; /* database Id (for background worker) */
Oid am_id; /* Lsm3 AM Oid */
int top_index_size; /* Size of top index */
slock_t spinlock; /* Spinlock to synchronize access */
} Lsm3DictEntry;
/*
* Opaque part of index scan descriptor
*/
typedef struct
{
Lsm3DictEntry* entry; /* Lsm3 control structure */
Relation top_index[2]; /* Opened top index relations */
SortSupport sortKeys; /* Context for comparing index tuples */
IndexScanDesc scan[3]; /* Scan descriptors for two top indexes and base index */
bool eof[3]; /* Indicators that end of index was reached */
bool unique; /* Whether index is "unique" and we can stop scan after locating first occurrence */
int curr_index; /* Index from which last tuple was selected (or -1 if none) */
} Lsm3ScanOpaque;
/* Lsm3 index options */
typedef struct
{
BTOptions nbt_opts; /* Standard B-Tree options */
int top_index_size; /* Size of top index (overrode lsm3.top_index_size GUC */
bool unique; /* Index may not contain duplicates. We prohibit unique constraint for Lsm3 index
* because it can not be enforced. But presence of this index option allows to optimize
* index lookup: if key is found in active top index, do not search other two indexes.
*/
} Lsm3Options;

59
pgxn/lsm3/sql/test.sql Normal file
View File

@@ -0,0 +1,59 @@
create extension lsm3;
create table t(k bigint, val bigint);
create index lsm3_index on t using lsm3(k);
set enable_seqscan=off;
insert into t values (1,10);
select lsm3_start_merge('lsm3_index');
select lsm3_wait_merge_completion('lsm3_index');
insert into t values (2,20);
select lsm3_start_merge('lsm3_index');
select lsm3_wait_merge_completion('lsm3_index');
insert into t values (3,30);
select lsm3_start_merge('lsm3_index');
select lsm3_wait_merge_completion('lsm3_index');
insert into t values (4,40);
select lsm3_start_merge('lsm3_index');
select lsm3_wait_merge_completion('lsm3_index');
insert into t values (5,50);
select lsm3_start_merge('lsm3_index');
select lsm3_wait_merge_completion('lsm3_index');
select lsm3_get_merge_count('lsm3_index');
select * from t where k = 1;
select * from t order by k;
select * from t order by k desc;
analyze t;
explain (COSTS OFF, TIMING OFF, SUMMARY OFF) select * from t order by k;
insert into t values (generate_series(1,100000), 1);
insert into t values (generate_series(1000001,200000), 2);
insert into t values (generate_series(2000001,300000), 3);
insert into t values (generate_series(1,100000), 1);
insert into t values (generate_series(1000001,200000), 2);
insert into t values (generate_series(2000001,300000), 3);
select * from t where k = 1;
select * from t where k = 1000000;
select * from t where k = 2000000;
select * from t where k = 3000000;
analyze t;
explain (COSTS OFF, TIMING OFF, SUMMARY OFF) select * from t where k = 1;
select lsm3_get_merge_count('lsm3_index') > 5;
truncate table t;
insert into t values (generate_series(1,1000000), 1);
select * from t where k = 1;
reindex table t;
select * from t where k = 1;
drop table t;
create table lsm(k bigint);
insert into lsm values (generate_series(1, 1000000));
create index concurrently on lsm using lsm3(k);
select * from lsm where k = 1;
drop table lsm;