diff --git a/docker-compose/compute_wrapper/shell/compute.sh b/docker-compose/compute_wrapper/shell/compute.sh index 9dbdcce69f..418aaf876d 100755 --- a/docker-compose/compute_wrapper/shell/compute.sh +++ b/docker-compose/compute_wrapper/shell/compute.sh @@ -67,6 +67,14 @@ else fi fi +if [[ ${PG_VERSION} -ge 17 ]]; then + ulid_extension=pgx_ulid +else + ulid_extension=ulid +fi +echo "Adding pgx_ulid" +shared_libraries=$(jq -r '.cluster.settings[] | select(.name=="shared_preload_libraries").value' ${SPEC_FILE}) +sed -i "s/${shared_libraries}/${shared_libraries},${ulid_extension}/" ${SPEC_FILE} echo "Overwrite tenant id and timeline id in spec file" sed -i "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE} sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE} diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh index 0f03d600a3..9d867d97f6 100755 --- a/docker-compose/docker_compose_test.sh +++ b/docker-compose/docker_compose_test.sh @@ -69,7 +69,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do cat ../compute/patches/contrib_pg${pg_version}.patch | docker exec -i $TEST_CONTAINER_NAME bash -c "(cd /postgres && patch -p1)" # We are running tests now rm -f testout.txt testout_contrib.txt - docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pg_tiktoken-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src \ + docker exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \ $TEST_CONTAINER_NAME /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0 docker exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \ $TEST_CONTAINER_NAME /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0 diff --git a/docker-compose/ext-src/pg_tiktoken-src/Makefile b/docker-compose/ext-src/pg_tiktoken-src/Makefile new file mode 100644 index 0000000000..e23166554a --- /dev/null +++ b/docker-compose/ext-src/pg_tiktoken-src/Makefile @@ -0,0 +1,8 @@ +PG_CONFIG ?= pg_config +PG_REGRESS = $(shell dirname $$($(PG_CONFIG) --pgxs))/../../src/test/regress/pg_regress +REGRESS = pg_tiktoken + +installcheck: regression-test + +regression-test: + $(PG_REGRESS) --inputdir=. --outputdir=. --dbname=contrib_regression $(REGRESS) \ No newline at end of file diff --git a/docker-compose/ext-src/pg_tiktoken-src/expected/pg_tiktoken.out b/docker-compose/ext-src/pg_tiktoken-src/expected/pg_tiktoken.out new file mode 100644 index 0000000000..0bdcdc60c2 --- /dev/null +++ b/docker-compose/ext-src/pg_tiktoken-src/expected/pg_tiktoken.out @@ -0,0 +1,53 @@ +-- Load the extension +CREATE EXTENSION IF NOT EXISTS pg_tiktoken; +-- Test encoding function +SELECT tiktoken_encode('cl100k_base', 'Hello world!'); + tiktoken_encode +----------------- + {9906,1917,0} +(1 row) + +-- Test token count function +SELECT tiktoken_count('cl100k_base', 'Hello world!'); + tiktoken_count +---------------- + 3 +(1 row) + +-- Test encoding function with a different model +SELECT tiktoken_encode('r50k_base', 'PostgreSQL is amazing!'); + tiktoken_encode +------------------------- + {6307,47701,318,4998,0} +(1 row) + +-- Test token count function with the same model +SELECT tiktoken_count('r50k_base', 'PostgreSQL is amazing!'); + tiktoken_count +---------------- + 5 +(1 row) + +-- Edge cases: Empty string +SELECT tiktoken_encode('cl100k_base', ''); + tiktoken_encode +----------------- + {} +(1 row) + +SELECT tiktoken_count('cl100k_base', ''); + tiktoken_count +---------------- + 0 +(1 row) + +-- Edge cases: Long text +SELECT tiktoken_count('cl100k_base', repeat('word ', 100)); + tiktoken_count +---------------- + 101 +(1 row) + +-- Edge case: Invalid encoding +SELECT tiktoken_encode('invalid_model', 'Test') AS should_fail; +ERROR: 'invalid_model': unknown model or encoder diff --git a/docker-compose/ext-src/pg_tiktoken-src/sql/pg_tiktoken.sql b/docker-compose/ext-src/pg_tiktoken-src/sql/pg_tiktoken.sql new file mode 100644 index 0000000000..626226c82e --- /dev/null +++ b/docker-compose/ext-src/pg_tiktoken-src/sql/pg_tiktoken.sql @@ -0,0 +1,24 @@ +-- Load the extension +CREATE EXTENSION IF NOT EXISTS pg_tiktoken; + +-- Test encoding function +SELECT tiktoken_encode('cl100k_base', 'Hello world!'); + +-- Test token count function +SELECT tiktoken_count('cl100k_base', 'Hello world!'); + +-- Test encoding function with a different model +SELECT tiktoken_encode('r50k_base', 'PostgreSQL is amazing!'); + +-- Test token count function with the same model +SELECT tiktoken_count('r50k_base', 'PostgreSQL is amazing!'); + +-- Edge cases: Empty string +SELECT tiktoken_encode('cl100k_base', ''); +SELECT tiktoken_count('cl100k_base', ''); + +-- Edge cases: Long text +SELECT tiktoken_count('cl100k_base', repeat('word ', 100)); + +-- Edge case: Invalid encoding +SELECT tiktoken_encode('invalid_model', 'Test') AS should_fail; \ No newline at end of file diff --git a/docker-compose/ext-src/pgrag-src/Makefile b/docker-compose/ext-src/pgrag-src/Makefile new file mode 100644 index 0000000000..dbf91cf501 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/Makefile @@ -0,0 +1,10 @@ +EXTENSION = rag +MODULE_big = rag +OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs)) + +REGRESS = basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions +REGRESS_OPTS = --load-extension=vector --load-extension=rag + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/docker-compose/ext-src/pgrag-src/expected/api_keys.out b/docker-compose/ext-src/pgrag-src/expected/api_keys.out new file mode 100644 index 0000000000..3da3786f9b --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/expected/api_keys.out @@ -0,0 +1,49 @@ +-- API key function tests +SELECT rag.anthropic_set_api_key('test_key'); + anthropic_set_api_key +----------------------- + +(1 row) + +SELECT rag.anthropic_get_api_key(); + anthropic_get_api_key +----------------------- + test_key +(1 row) + +SELECT rag.openai_set_api_key('test_key'); + openai_set_api_key +-------------------- + +(1 row) + +SELECT rag.openai_get_api_key(); + openai_get_api_key +-------------------- + test_key +(1 row) + +SELECT rag.fireworks_set_api_key('test_key'); + fireworks_set_api_key +----------------------- + +(1 row) + +SELECT rag.fireworks_get_api_key(); + fireworks_get_api_key +----------------------- + test_key +(1 row) + +SELECT rag.voyageai_set_api_key('test_key'); + voyageai_set_api_key +---------------------- + +(1 row) + +SELECT rag.voyageai_get_api_key(); + voyageai_get_api_key +---------------------- + test_key +(1 row) + diff --git a/docker-compose/ext-src/pgrag-src/expected/basic_functions.out b/docker-compose/ext-src/pgrag-src/expected/basic_functions.out new file mode 100644 index 0000000000..1e5414686b --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/expected/basic_functions.out @@ -0,0 +1,13 @@ +-- Basic function tests +SELECT rag.markdown_from_html('
Hello
'); + markdown_from_html +-------------------- + Hello +(1 row) + +SELECT array_length(rag.chunks_by_character_count('the cat sat on the mat', 10, 5), 1); + array_length +-------------- + 3 +(1 row) + diff --git a/docker-compose/ext-src/pgrag-src/expected/chunking_functions.out b/docker-compose/ext-src/pgrag-src/expected/chunking_functions.out new file mode 100644 index 0000000000..c0546a1a8e --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/expected/chunking_functions.out @@ -0,0 +1,31 @@ +-- Chunking function tests +SELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5); + chunks_by_character_count +--------------------------------------- + {"the cat","cat sat on","on the mat"} +(1 row) + +SELECT rag.chunks_by_character_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 20, 10); + chunks_by_character_count +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"Lorem ipsum dolor","dolor sit amet,","amet, consectetur","adipiscing elit.","Sed do eiusmod","do eiusmod tempor","tempor incididunt ut","ut labore et dolore","et dolore magna","magna aliqua."} +(1 row) + +SELECT (rag.chunks_by_character_count('the cat', 10, 0))[1]; + chunks_by_character_count +--------------------------- + the cat +(1 row) + +SELECT rag.chunks_by_character_count('', 10, 5); + chunks_by_character_count +--------------------------- + {} +(1 row) + +SELECT rag.chunks_by_character_count('a b c d e f g h i j k l m n o p', 5, 2); + chunks_by_character_count +----------------------------------------------------------------- + {"a b c","c d e","e f g","g h i","i j k","k l m","m n o","o p"} +(1 row) + diff --git a/docker-compose/ext-src/pgrag-src/expected/document_processing.out b/docker-compose/ext-src/pgrag-src/expected/document_processing.out new file mode 100644 index 0000000000..befb6b3f23 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/expected/document_processing.out @@ -0,0 +1,56 @@ +-- HTML to Markdown conversion tests +SELECT rag.markdown_from_html('Hello
'); + markdown_from_html +-------------------- + Hello +(1 row) + +SELECT rag.markdown_from_html('Hello world
'); + markdown_from_html +-------------------- + Hello _world_ +(1 row) + +SELECT rag.markdown_from_html('Paragraph
'); + markdown_from_html +-------------------- + # Title + + + + Paragraph +(1 row) + +SELECT rag.markdown_from_html('Hello world
'); + markdown_from_html +-------------------- + Hello _world_ +(1 row) + +SELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5); + chunks_by_character_count +--------------------------------------- + {"the cat","cat sat on","on the mat"} +(1 row) + diff --git a/docker-compose/ext-src/pgrag-src/expected/voyageai_functions.out b/docker-compose/ext-src/pgrag-src/expected/voyageai_functions.out new file mode 100644 index 0000000000..01f347d610 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/expected/voyageai_functions.out @@ -0,0 +1,141 @@ +-- Test VoyageAI API key functions +SELECT 'voyageai_api_key_test' AS test_name, + (SELECT rag.voyageai_set_api_key('test_key') IS NULL) AS result; + test_name | result +-----------------------+-------- + voyageai_api_key_test | t +(1 row) + +SELECT 'voyageai_get_api_key_test' AS test_name, + (SELECT rag.voyageai_get_api_key() = 'test_key') AS result; + test_name | result +---------------------------+-------- + voyageai_get_api_key_test | t +(1 row) + +-- Test VoyageAI embedding functions exist +SELECT 'voyageai_embedding_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +---------------------------+-------- + voyageai_embedding_exists | t +(1 row) + +SELECT 'voyageai_embedding_3_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_3' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +-----------------------------+-------- + voyageai_embedding_3_exists | t +(1 row) + +SELECT 'voyageai_embedding_3_lite_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_3_lite' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +----------------------------------+-------- + voyageai_embedding_3_lite_exists | t +(1 row) + +SELECT 'voyageai_embedding_code_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_code_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +----------------------------------+-------- + voyageai_embedding_code_2_exists | t +(1 row) + +SELECT 'voyageai_embedding_finance_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_finance_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +-------------------------------------+-------- + voyageai_embedding_finance_2_exists | t +(1 row) + +SELECT 'voyageai_embedding_law_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_law_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +---------------------------------+-------- + voyageai_embedding_law_2_exists | t +(1 row) + +SELECT 'voyageai_embedding_multilingual_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_multilingual_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +------------------------------------------+-------- + voyageai_embedding_multilingual_2_exists | t +(1 row) + +-- Test VoyageAI reranking functions exist +SELECT 'voyageai_rerank_distance_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_distance' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +---------------------------------+-------- + voyageai_rerank_distance_exists | t +(1 row) + +SELECT 'voyageai_rerank_score_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_score' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + test_name | result +------------------------------+-------- + voyageai_rerank_score_exists | t +(1 row) + +-- Test VoyageAI function signatures +SELECT 'voyageai_embedding_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs = 3; + test_name | result +------------------------------+-------- + voyageai_embedding_signature | t +(1 row) + +SELECT 'voyageai_rerank_distance_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_distance' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs IN (3, 4); + test_name | result +------------------------------------+-------- + voyageai_rerank_distance_signature | t +(1 row) + +SELECT 'voyageai_rerank_score_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_score' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs IN (3, 4); + test_name | result +---------------------------------+-------- + voyageai_rerank_score_signature | t +(1 row) + diff --git a/docker-compose/ext-src/pgrag-src/sql/api_keys.sql b/docker-compose/ext-src/pgrag-src/sql/api_keys.sql new file mode 100644 index 0000000000..36b928bccc --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/sql/api_keys.sql @@ -0,0 +1,16 @@ +-- API key function tests +SELECT rag.anthropic_set_api_key('test_key'); + +SELECT rag.anthropic_get_api_key(); + +SELECT rag.openai_set_api_key('test_key'); + +SELECT rag.openai_get_api_key(); + +SELECT rag.fireworks_set_api_key('test_key'); + +SELECT rag.fireworks_get_api_key(); + +SELECT rag.voyageai_set_api_key('test_key'); + +SELECT rag.voyageai_get_api_key(); diff --git a/docker-compose/ext-src/pgrag-src/sql/basic_functions.sql b/docker-compose/ext-src/pgrag-src/sql/basic_functions.sql new file mode 100644 index 0000000000..5e73bc1639 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/sql/basic_functions.sql @@ -0,0 +1,4 @@ +-- Basic function tests +SELECT rag.markdown_from_html('Hello
'); + +SELECT array_length(rag.chunks_by_character_count('the cat sat on the mat', 10, 5), 1); diff --git a/docker-compose/ext-src/pgrag-src/sql/chunking_functions.sql b/docker-compose/ext-src/pgrag-src/sql/chunking_functions.sql new file mode 100644 index 0000000000..1a6cea1706 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/sql/chunking_functions.sql @@ -0,0 +1,11 @@ +-- Chunking function tests +SELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5); + +SELECT rag.chunks_by_character_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 20, 10); + +SELECT (rag.chunks_by_character_count('the cat', 10, 0))[1]; + +SELECT rag.chunks_by_character_count('', 10, 5); + +SELECT rag.chunks_by_character_count('a b c d e f g h i j k l m n o p', 5, 2); + diff --git a/docker-compose/ext-src/pgrag-src/sql/document_processing.sql b/docker-compose/ext-src/pgrag-src/sql/document_processing.sql new file mode 100644 index 0000000000..ed94dd0e1a --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/sql/document_processing.sql @@ -0,0 +1,24 @@ +-- HTML to Markdown conversion tests +SELECT rag.markdown_from_html('Hello
'); + +SELECT rag.markdown_from_html('Hello world
'); + +SELECT rag.markdown_from_html('Paragraph
'); + +SELECT rag.markdown_from_html('Hello world
'); + +SELECT rag.chunks_by_character_count('the cat sat on the mat', 10, 5); diff --git a/docker-compose/ext-src/pgrag-src/sql/voyageai_functions.sql b/docker-compose/ext-src/pgrag-src/sql/voyageai_functions.sql new file mode 100644 index 0000000000..73d4241519 --- /dev/null +++ b/docker-compose/ext-src/pgrag-src/sql/voyageai_functions.sql @@ -0,0 +1,84 @@ +-- Test VoyageAI API key functions +SELECT 'voyageai_api_key_test' AS test_name, + (SELECT rag.voyageai_set_api_key('test_key') IS NULL) AS result; + +SELECT 'voyageai_get_api_key_test' AS test_name, + (SELECT rag.voyageai_get_api_key() = 'test_key') AS result; + +-- Test VoyageAI embedding functions exist +SELECT 'voyageai_embedding_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_3_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_3' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_3_lite_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_3_lite' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_code_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_code_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_finance_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_finance_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_law_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_law_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_embedding_multilingual_2_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding_multilingual_2' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +-- Test VoyageAI reranking functions exist +SELECT 'voyageai_rerank_distance_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_distance' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +SELECT 'voyageai_rerank_score_exists' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_score' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag'); + +-- Test VoyageAI function signatures +SELECT 'voyageai_embedding_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_embedding' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs = 3; + +SELECT 'voyageai_rerank_distance_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_distance' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs IN (3, 4); + +SELECT 'voyageai_rerank_score_signature' AS test_name, + count(*) > 0 AS result +FROM pg_proc +WHERE proname = 'voyageai_rerank_score' + AND pronamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'rag') + AND pronargs IN (3, 4); diff --git a/docker-compose/ext-src/pgx_ulid-src/Makefile b/docker-compose/ext-src/pgx_ulid-src/Makefile new file mode 100644 index 0000000000..91aceef906 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/Makefile @@ -0,0 +1,16 @@ +EXTENSION = pgx_ulid + +PGFILEDESC = "pgx_ulid - ULID type for PostgreSQL" + +PG_CONFIG ?= pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +PG_MAJOR_VERSION := $(word 2, $(subst ., , $(shell $(PG_CONFIG) --version))) +ifeq ($(shell test $(PG_MAJOR_VERSION) -lt 17; echo $$?),0) + REGRESS_OPTS = --load-extension=ulid + REGRESS = 00_ulid_generation 01_ulid_conversions 03_ulid_errors +else + REGRESS_OPTS = --load-extension=pgx_ulid + REGRESS = 00_ulid_generation 01_ulid_conversions 02_ulid_conversions 03_ulid_errors +endif + +include $(PGXS) diff --git a/docker-compose/ext-src/pgx_ulid-src/expected/00_ulid_generation.out b/docker-compose/ext-src/pgx_ulid-src/expected/00_ulid_generation.out new file mode 100644 index 0000000000..a30b620150 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/expected/00_ulid_generation.out @@ -0,0 +1,60 @@ +-- Test basic ULID generation +-- Test gen_ulid() function +SELECT 'gen_ulid() returns a non-null value' as test_name, + gen_ulid() IS NOT NULL as result; + test_name | result +-------------------------------------+-------- + gen_ulid() returns a non-null value | t +(1 row) + +-- Test that multiple calls to gen_ulid() return different values +SELECT 'gen_ulid() returns unique values' as test_name, + gen_ulid() != gen_ulid() as result; + test_name | result +----------------------------------+-------- + gen_ulid() returns unique values | t +(1 row) + +-- Test that gen_ulid() returns a value with the correct format +SELECT 'gen_ulid() returns correctly formatted value' as test_name, + length(gen_ulid()::text) = 26 as result; + test_name | result +----------------------------------------------+-------- + gen_ulid() returns correctly formatted value | t +(1 row) + +-- Test monotonic ULID generation +SELECT 'gen_monotonic_ulid() returns a non-null value' as test_name, + gen_monotonic_ulid() IS NOT NULL as result; + test_name | result +-----------------------------------------------+-------- + gen_monotonic_ulid() returns a non-null value | t +(1 row) + +-- Test that multiple calls to gen_monotonic_ulid() return different values +SELECT 'gen_monotonic_ulid() returns unique values' as test_name, + gen_monotonic_ulid() != gen_monotonic_ulid() as result; + test_name | result +--------------------------------------------+-------- + gen_monotonic_ulid() returns unique values | t +(1 row) + +-- Test that gen_monotonic_ulid() returns a value with the correct format +SELECT 'gen_monotonic_ulid() returns correctly formatted value' as test_name, + length(gen_monotonic_ulid()::text) = 26 as result; + test_name | result +--------------------------------------------------------+-------- + gen_monotonic_ulid() returns correctly formatted value | t +(1 row) + +-- Test that monotonic ULIDs are ordered correctly +SELECT 'gen_monotonic_ulid() returns ordered values' as test_name, + u1 < u2 as result +FROM ( + SELECT gen_monotonic_ulid() as u1, gen_monotonic_ulid() as u2 +) subq; + test_name | result +---------------------------------------------+-------- + gen_monotonic_ulid() returns ordered values | t +(1 row) + diff --git a/docker-compose/ext-src/pgx_ulid-src/expected/01_ulid_conversions.out b/docker-compose/ext-src/pgx_ulid-src/expected/01_ulid_conversions.out new file mode 100644 index 0000000000..19474ccca1 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/expected/01_ulid_conversions.out @@ -0,0 +1,55 @@ +-- Create a test ULID value +CREATE TEMP TABLE test_ulids AS +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYV'::ulid as test_ulid; +-- Test conversion to text +SELECT 'ulid to text conversion' as test_name, + test_ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result +FROM test_ulids; + test_name | result +-------------------------+-------- + ulid to text conversion | t +(1 row) + +-- Test conversion to UUID +SELECT 'ulid to UUID conversion' as test_name, + test_ulid::uuid::text = '0186cb65-25d7-81da-815c-7e25a6bfe7db' as result +FROM test_ulids; + test_name | result +-------------------------+-------- + ulid to UUID conversion | t +(1 row) + +-- Test conversion to bytea +SELECT 'ulid to bytea conversion' as test_name, + length(test_ulid::bytea) = 16 as result +FROM test_ulids; + test_name | result +--------------------------+-------- + ulid to bytea conversion | t +(1 row) + +-- Test conversion to timestamp +SELECT 'ulid to timestamp conversion' as test_name, + to_char(test_ulid::timestamp, 'YYYY-MM-DD HH24:MI:SS.MS') = '2023-03-10 04:00:49.111' as result +FROM test_ulids; + test_name | result +------------------------------+-------- + ulid to timestamp conversion | t +(1 row) + +-- Test conversion from UUID +SELECT 'UUID to ulid conversion' as test_name, + '0186cb65-25d7-81da-815c-7e25a6bfe7db'::uuid::ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result; + test_name | result +-------------------------+-------- + UUID to ulid conversion | t +(1 row) + +-- Test conversion from timestamp +SELECT 'timestamp to ulid conversion' as test_name, + '2023-03-10 12:00:49.111'::timestamp::ulid::text = '01GV5PA9EQ0000000000000000' as result; + test_name | result +------------------------------+-------- + timestamp to ulid conversion | t +(1 row) + diff --git a/docker-compose/ext-src/pgx_ulid-src/expected/02_ulid_conversions.out b/docker-compose/ext-src/pgx_ulid-src/expected/02_ulid_conversions.out new file mode 100644 index 0000000000..d1480f207c --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/expected/02_ulid_conversions.out @@ -0,0 +1,8 @@ +-- Test conversion from timestamptz +SELECT 'timestamptz to ulid conversion' as test_name, + '2023-03-10 04:00:49.111'::timestamptz::ulid::text = '01GV5PA9EQ0000000000000000' as result; + test_name | result +--------------------------------+-------- + timestamptz to ulid conversion | t +(1 row) + diff --git a/docker-compose/ext-src/pgx_ulid-src/expected/03_ulid_errors.out b/docker-compose/ext-src/pgx_ulid-src/expected/03_ulid_errors.out new file mode 100644 index 0000000000..6d5dd99298 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/expected/03_ulid_errors.out @@ -0,0 +1,19 @@ +-- Test ULID error handling +-- Test invalid ULID string (too short) +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid; +ERROR: invalid input syntax for type ulid: "01GV5PA9EQG7D82Q3Y4PKBZSY": invalid length +LINE 1: SELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid; + ^ +-- Test invalid ULID string (invalid character) +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid; +ERROR: invalid input syntax for type ulid: "01GV5PA9EQG7D82Q3Y4PKBZSYU": invalid character +LINE 1: SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid; + ^ +-- Test NULL handling +SELECT 'NULL to ulid conversion returns NULL' as test_name, + NULL::ulid IS NULL as result; + test_name | result +--------------------------------------+-------- + NULL to ulid conversion returns NULL | t +(1 row) + diff --git a/docker-compose/ext-src/pgx_ulid-src/sql/00_ulid_generation.sql b/docker-compose/ext-src/pgx_ulid-src/sql/00_ulid_generation.sql new file mode 100644 index 0000000000..8b110b1cf0 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/sql/00_ulid_generation.sql @@ -0,0 +1,32 @@ +-- Test basic ULID generation + +-- Test gen_ulid() function +SELECT 'gen_ulid() returns a non-null value' as test_name, + gen_ulid() IS NOT NULL as result; + +-- Test that multiple calls to gen_ulid() return different values +SELECT 'gen_ulid() returns unique values' as test_name, + gen_ulid() != gen_ulid() as result; + +-- Test that gen_ulid() returns a value with the correct format +SELECT 'gen_ulid() returns correctly formatted value' as test_name, + length(gen_ulid()::text) = 26 as result; + +-- Test monotonic ULID generation +SELECT 'gen_monotonic_ulid() returns a non-null value' as test_name, + gen_monotonic_ulid() IS NOT NULL as result; + +-- Test that multiple calls to gen_monotonic_ulid() return different values +SELECT 'gen_monotonic_ulid() returns unique values' as test_name, + gen_monotonic_ulid() != gen_monotonic_ulid() as result; + +-- Test that gen_monotonic_ulid() returns a value with the correct format +SELECT 'gen_monotonic_ulid() returns correctly formatted value' as test_name, + length(gen_monotonic_ulid()::text) = 26 as result; + +-- Test that monotonic ULIDs are ordered correctly +SELECT 'gen_monotonic_ulid() returns ordered values' as test_name, + u1 < u2 as result +FROM ( + SELECT gen_monotonic_ulid() as u1, gen_monotonic_ulid() as u2 +) subq; diff --git a/docker-compose/ext-src/pgx_ulid-src/sql/01_ulid_conversions.sql b/docker-compose/ext-src/pgx_ulid-src/sql/01_ulid_conversions.sql new file mode 100644 index 0000000000..1ff2d60372 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/sql/01_ulid_conversions.sql @@ -0,0 +1,32 @@ +-- Create a test ULID value +CREATE TEMP TABLE test_ulids AS +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYV'::ulid as test_ulid; + +-- Test conversion to text +SELECT 'ulid to text conversion' as test_name, + test_ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result +FROM test_ulids; + +-- Test conversion to UUID +SELECT 'ulid to UUID conversion' as test_name, + test_ulid::uuid::text = '0186cb65-25d7-81da-815c-7e25a6bfe7db' as result +FROM test_ulids; + +-- Test conversion to bytea +SELECT 'ulid to bytea conversion' as test_name, + length(test_ulid::bytea) = 16 as result +FROM test_ulids; + +-- Test conversion to timestamp +SELECT 'ulid to timestamp conversion' as test_name, + to_char(test_ulid::timestamp, 'YYYY-MM-DD HH24:MI:SS.MS') = '2023-03-10 04:00:49.111' as result +FROM test_ulids; + +-- Test conversion from UUID +SELECT 'UUID to ulid conversion' as test_name, + '0186cb65-25d7-81da-815c-7e25a6bfe7db'::uuid::ulid::text = '01GV5PA9EQG7D82Q3Y4PKBZSYV' as result; + +-- Test conversion from timestamp +SELECT 'timestamp to ulid conversion' as test_name, + '2023-03-10 12:00:49.111'::timestamp::ulid::text = '01GV5PA9EQ0000000000000000' as result; + diff --git a/docker-compose/ext-src/pgx_ulid-src/sql/02_ulid_conversions.sql b/docker-compose/ext-src/pgx_ulid-src/sql/02_ulid_conversions.sql new file mode 100644 index 0000000000..2038512753 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/sql/02_ulid_conversions.sql @@ -0,0 +1,3 @@ +-- Test conversion from timestamptz +SELECT 'timestamptz to ulid conversion' as test_name, + '2023-03-10 04:00:49.111'::timestamptz::ulid::text = '01GV5PA9EQ0000000000000000' as result; diff --git a/docker-compose/ext-src/pgx_ulid-src/sql/03_ulid_errors.sql b/docker-compose/ext-src/pgx_ulid-src/sql/03_ulid_errors.sql new file mode 100644 index 0000000000..44dc07d309 --- /dev/null +++ b/docker-compose/ext-src/pgx_ulid-src/sql/03_ulid_errors.sql @@ -0,0 +1,12 @@ +-- Test ULID error handling + +-- Test invalid ULID string (too short) +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSY'::ulid; + +-- Test invalid ULID string (invalid character) +SELECT '01GV5PA9EQG7D82Q3Y4PKBZSYU'::ulid; + +-- Test NULL handling +SELECT 'NULL to ulid conversion returns NULL' as test_name, + NULL::ulid IS NULL as result; + diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile b/docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile new file mode 100644 index 0000000000..de39cdc367 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/Makefile @@ -0,0 +1,10 @@ +EXTENSION = rag_bge_small_en_v15 +MODULE_big = rag_bge_small_en_v15 +OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs)) + +REGRESS = basic_functions embedding_functions basic_functions_enhanced embedding_functions_enhanced +REGRESS_OPTS = --load-extension=vector --load-extension=rag_bge_small_en_v15 + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions.out b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions.out new file mode 100644 index 0000000000..17194b79a5 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions.out @@ -0,0 +1,7 @@ +-- Basic function tests +SELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2); + chunks_by_token_count +-------------------------------------------------------- + {"the cat sat","cat sat on","sat on the","on the mat"} +(1 row) + diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions_enhanced.out b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions_enhanced.out new file mode 100644 index 0000000000..f191aad5db --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/basic_functions_enhanced.out @@ -0,0 +1,31 @@ +-- Basic function tests for chunks_by_token_count +SELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2); + chunks_by_token_count +-------------------------------------------------------- + {"the cat sat","cat sat on","sat on the","on the mat"} +(1 row) + +SELECT rag_bge_small_en_v15.chunks_by_token_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 5, 2); + chunks_by_token_count +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + {"Lorem ipsum","ipsum dolor sit","sit amet,",consectetur,"adipiscing elit",elit.,"Sed do","do eiusmod",tempor,"incididunt ut","ut labore et","et dolore magna","magna aliqua."} +(1 row) + +SELECT (rag_bge_small_en_v15.chunks_by_token_count('the cat', 5, 0))[1]; + chunks_by_token_count +----------------------- + the cat +(1 row) + +SELECT rag_bge_small_en_v15.chunks_by_token_count('', 5, 2); + chunks_by_token_count +----------------------- + {} +(1 row) + +SELECT rag_bge_small_en_v15.chunks_by_token_count('a b c d e f g h i j k l m n o p', 3, 1); + chunks_by_token_count +----------------------------------------------------------------- + {"a b c","c d e","e f g","g h i","i j k","k l m","m n o","o p"} +(1 row) + diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions.out b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions.out new file mode 100644 index 0000000000..034e41bd47 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions.out @@ -0,0 +1,15 @@ +-- Embedding function tests +SELECT 'embedding_for_passage_test' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result; + test_name | result +----------------------------+-------- + embedding_for_passage_test | t +(1 row) + +SELECT 'embedding_for_query_test' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result; + test_name | result +--------------------------+-------- + embedding_for_query_test | t +(1 row) + diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions_enhanced.out b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions_enhanced.out new file mode 100644 index 0000000000..1fdcdf4e42 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/expected/embedding_functions_enhanced.out @@ -0,0 +1,52 @@ +-- Embedding function tests +SELECT 'embedding_for_passage_test_1' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result; + test_name | result +------------------------------+-------- + embedding_for_passage_test_1 | t +(1 row) + +SELECT 'embedding_for_passage_test_2' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('Lorem ipsum dolor sit amet')) > 0 AS result; + test_name | result +------------------------------+-------- + embedding_for_passage_test_2 | t +(1 row) + +SELECT 'embedding_for_passage_test_3' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('')) > 0 AS result; + test_name | result +------------------------------+-------- + embedding_for_passage_test_3 | t +(1 row) + +SELECT 'embedding_for_query_test_1' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result; + test_name | result +----------------------------+-------- + embedding_for_query_test_1 | t +(1 row) + +SELECT 'embedding_for_query_test_2' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('Lorem ipsum dolor sit amet')) > 0 AS result; + test_name | result +----------------------------+-------- + embedding_for_query_test_2 | t +(1 row) + +SELECT 'embedding_for_query_test_3' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('')) > 0 AS result; + test_name | result +----------------------------+-------- + embedding_for_query_test_3 | t +(1 row) + +-- Test that passage and query embeddings have the same dimensions +SELECT 'embedding_dimensions_match' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('test')) = + vector_dims(rag_bge_small_en_v15.embedding_for_query('test')) AS result; + test_name | result +----------------------------+-------- + embedding_dimensions_match | t +(1 row) + diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions.sql b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions.sql new file mode 100644 index 0000000000..f60207e074 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions.sql @@ -0,0 +1,2 @@ +-- Basic function tests +SELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2); diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions_enhanced.sql b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions_enhanced.sql new file mode 100644 index 0000000000..f2089cecec --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/basic_functions_enhanced.sql @@ -0,0 +1,10 @@ +-- Basic function tests for chunks_by_token_count +SELECT rag_bge_small_en_v15.chunks_by_token_count('the cat sat on the mat', 3, 2); + +SELECT rag_bge_small_en_v15.chunks_by_token_count('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', 5, 2); + +SELECT (rag_bge_small_en_v15.chunks_by_token_count('the cat', 5, 0))[1]; + +SELECT rag_bge_small_en_v15.chunks_by_token_count('', 5, 2); + +SELECT rag_bge_small_en_v15.chunks_by_token_count('a b c d e f g h i j k l m n o p', 3, 1); diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions.sql b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions.sql new file mode 100644 index 0000000000..ef9dedd9d7 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions.sql @@ -0,0 +1,6 @@ +-- Embedding function tests +SELECT 'embedding_for_passage_test' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result; + +SELECT 'embedding_for_query_test' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result; diff --git a/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions_enhanced.sql b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions_enhanced.sql new file mode 100644 index 0000000000..0ca5d28111 --- /dev/null +++ b/docker-compose/ext-src/rag_bge_small_en_v15-src/sql/embedding_functions_enhanced.sql @@ -0,0 +1,23 @@ +-- Embedding function tests +SELECT 'embedding_for_passage_test_1' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('the cat sat on the mat')) > 0 AS result; + +SELECT 'embedding_for_passage_test_2' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('Lorem ipsum dolor sit amet')) > 0 AS result; + +SELECT 'embedding_for_passage_test_3' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('')) > 0 AS result; + +SELECT 'embedding_for_query_test_1' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('the cat sat on the mat')) > 0 AS result; + +SELECT 'embedding_for_query_test_2' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('Lorem ipsum dolor sit amet')) > 0 AS result; + +SELECT 'embedding_for_query_test_3' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_query('')) > 0 AS result; + +-- Test that passage and query embeddings have the same dimensions +SELECT 'embedding_dimensions_match' AS test_name, + vector_dims(rag_bge_small_en_v15.embedding_for_passage('test')) = + vector_dims(rag_bge_small_en_v15.embedding_for_query('test')) AS result; diff --git a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile new file mode 100644 index 0000000000..6067debf56 --- /dev/null +++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/Makefile @@ -0,0 +1,10 @@ +EXTENSION = rag_jina_reranker_v1_tiny_en +MODULE_big = rag_jina_reranker_v1_tiny_en +OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs)) + +REGRESS = reranking_functions reranking_functions_enhanced +REGRESS_OPTS = --load-extension=vector --load-extension=rag_jina_reranker_v1_tiny_en + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) diff --git a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out new file mode 100644 index 0000000000..475718ea99 --- /dev/null +++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions.out @@ -0,0 +1,25 @@ +-- Reranking function tests +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon'); + rerank_distance +----------------- + 0.8989152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + rerank_distance +----------------------- + {0.8989152,1.3018152} +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon'); + rerank_score +-------------- + -0.8989152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + rerank_score +------------------------- + {-0.8989152,-1.3018152} +(1 row) + diff --git a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out new file mode 100644 index 0000000000..b610896fa2 --- /dev/null +++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/expected/reranking_functions_enhanced.out @@ -0,0 +1,92 @@ +-- Reranking function tests - single passage +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon'); + rerank_distance +----------------- + 0.8989152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings'); + rerank_distance +----------------- + 1.3018152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines'); + rerank_distance +----------------- + 1.3133051 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test'); + rerank_distance +----------------- + 0.7075559 +(1 row) + +-- Reranking function tests - array of passages +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', + ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + rerank_distance +----------------------- + {0.8989152,1.3018152} +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming', + ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']); + rerank_distance +------------------------------------ + {0.16591403,0.33475375,0.10132827} +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]); + rerank_distance +----------------- + {} +(1 row) + +-- Reranking score function tests - single passage +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon'); + rerank_score +-------------- + -0.8989152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings'); + rerank_score +-------------- + -1.3018152 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines'); + rerank_score +-------------- + -1.3133051 +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test'); + rerank_score +-------------- + -0.7075559 +(1 row) + +-- Reranking score function tests - array of passages +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', + ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + rerank_score +------------------------- + {-0.8989152,-1.3018152} +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming', + ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']); + rerank_score +--------------------------------------- + {-0.16591403,-0.33475375,-0.10132827} +(1 row) + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]); + rerank_score +-------------- + {} +(1 row) + diff --git a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql new file mode 100644 index 0000000000..0837b18ffd --- /dev/null +++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions.sql @@ -0,0 +1,8 @@ +-- Reranking function tests +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); \ No newline at end of file diff --git a/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql new file mode 100644 index 0000000000..b967d9e98e --- /dev/null +++ b/docker-compose/ext-src/rag_jina_reranker_v1_tiny_en-src/sql/reranking_functions_enhanced.sql @@ -0,0 +1,35 @@ +-- Reranking function tests - single passage +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test'); + +-- Reranking function tests - array of passages +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', + ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming', + ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]); + +-- Reranking score function tests - single passage +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines'); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test'); + +-- Reranking score function tests - array of passages +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', + ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming', + ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']); + +SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);