mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Add -fsigned-char for cross platform signed chars (#10852)
## Problem In multi-character keys, the GIN index creates a CRC Hash of the first 3 bytes of the key. The hash can have the first bit to be set or unset, needing to have a consistent representation of `char` across architectures for consistent results. GIN stores these keys by their hashes which determines the order in which the keys are obtained from the GIN index. By default, chars are signed in x86 and unsigned in arm, leading to inconsistent behavior across different platform architectures. Adding the `-fsigned-char` flag to the GCC compiler forces chars to be treated as signed across platforms, ensuring the ordering in which the keys are obtained consistent. ## Summary of changes Added `-fsigned-char` to the `CFLAGS` to force GCC to use signed chars across platforms. Added a test to check this across platforms. Fixes: https://github.com/neondatabase/cloud/issues/23199
This commit is contained in:
7
Makefile
7
Makefile
@@ -11,15 +11,16 @@ ICU_PREFIX_DIR := /usr/local/icu
|
||||
#
|
||||
BUILD_TYPE ?= debug
|
||||
WITH_SANITIZERS ?= no
|
||||
PG_CFLAGS = -fsigned-char
|
||||
ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS = -O2 -g3 $(CFLAGS)
|
||||
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
@@ -159,6 +160,8 @@ postgres-%: postgres-configure-% \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
|
||||
+@echo "Compiling pageinspect $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling pg_trgm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
|
||||
@@ -162,7 +162,7 @@ FROM build-deps AS pg-build
|
||||
ARG PG_VERSION
|
||||
COPY vendor/postgres-${PG_VERSION:?} postgres
|
||||
RUN cd postgres && \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
||||
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
||||
# zstd is available only from PG15
|
||||
|
||||
1
test_runner/regress/data/test_signed_char.out
Normal file
1
test_runner/regress/data/test_signed_char.out
Normal file
File diff suppressed because one or more lines are too long
64
test_runner/regress/test_signed_char.py
Normal file
64
test_runner/regress/test_signed_char.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from pathlib import Path
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
SIGNED_CHAR_EXTRACT = """
|
||||
WITH
|
||||
-- Generates an intermediate table with block numbers of the index
|
||||
pagenumbers AS (
|
||||
SELECT num FROM generate_series(0, (pg_relation_size('test_payload_idx') / 8192) - 1) it(num)
|
||||
)
|
||||
SELECT num,
|
||||
-- Gets the data of the page, skipping the first 8 bytes which is the LSN
|
||||
substr(page, 9, 8192-8),
|
||||
-- Returns information about the GIN index opaque area
|
||||
(gin_page_opaque_info(page)).*
|
||||
FROM pagenumbers,
|
||||
-- Gets a page from the respective blocks of the table
|
||||
LATERAL (SELECT get_raw_page('test_payload_idx', num)) AS p(page)
|
||||
-- Filters to only return leaf pages from the GIN Index
|
||||
WHERE ARRAY['leaf'] = ((gin_page_opaque_info(page)).flags);
|
||||
"""
|
||||
|
||||
|
||||
def test_signed_char(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test that postgres was compiled with -fsigned-char.
|
||||
---
|
||||
In multi-character keys, the GIN index creates a CRC Hash of the first 3 bytes of the key.
|
||||
The hash can have the first bit to be set or unset, needing to have a consistent representation
|
||||
of char across architectures for consistent results. GIN stores these keys by their hashes
|
||||
which determines the order in which the keys are obtained from the GIN index.
|
||||
Using -fsigned-char enforces this order across platforms making this consistent.
|
||||
The following query gets all the data present in the leaf page of a GIN index,
|
||||
which is ordered by the CRC hash and is consistent across platforms.
|
||||
"""
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
with endpoint.connect().cursor() as ses1:
|
||||
# Add the required extensions
|
||||
ses1.execute("CREATE EXTENSION pg_trgm;")
|
||||
ses1.execute("CREATE EXTENSION pageinspect;")
|
||||
# Create a test table
|
||||
ses1.execute("CREATE TABLE test (payload text);")
|
||||
# Create a GIN based index
|
||||
ses1.execute(
|
||||
"CREATE INDEX test_payload_idx ON test USING gin (payload gin_trgm_ops) WITH (gin_pending_list_limit = 64);"
|
||||
)
|
||||
# insert a multibyte character to trigger order-dependent hashing
|
||||
ses1.execute(
|
||||
"INSERT INTO test SELECT '123456789BV' || CHR(127153) /* ace of spades, a multibyte character */ || i::text from generate_series(1, 40) as i(i);"
|
||||
)
|
||||
ses1.execute("INSERT INTO test SELECT 'Bóbr';")
|
||||
# Clean pending list to flush data to pages
|
||||
ses1.execute("select gin_clean_pending_list('test_payload_idx'::regclass);")
|
||||
ses1.execute(SIGNED_CHAR_EXTRACT)
|
||||
pages = ses1.fetchall()
|
||||
# Compare expected output
|
||||
page1 = pages[0]
|
||||
data = bytes(page1[1]).hex()
|
||||
with open(Path(__file__).parent / "data" / "test_signed_char.out", encoding="utf-8") as f:
|
||||
expected = f.read().rstrip()
|
||||
|
||||
assert data == expected
|
||||
Reference in New Issue
Block a user