fix: correct inverted_indexed_column_ids behavior (#5586)

* fix: correct `inverted_indexed_column_ids`

* fix: fix unit tests
This commit is contained in:
Weny Xu
2025-02-23 16:17:38 +09:00
committed by GitHub
parent 4f988b5ba9
commit 286f225e50
3 changed files with 20 additions and 34 deletions

View File

@@ -394,27 +394,27 @@ mod tests {
use crate::config::{FulltextIndexConfig, Mode};
struct MetaConfig {
with_tag: bool,
with_inverted: bool,
with_fulltext: bool,
with_skipping_bloom: bool,
}
fn mock_region_metadata(
MetaConfig {
with_tag,
with_inverted,
with_fulltext,
with_skipping_bloom,
}: MetaConfig,
) -> RegionMetadataRef {
let mut builder = RegionMetadataBuilder::new(RegionId::new(1, 2));
let mut column_schema = ColumnSchema::new("a", ConcreteDataType::int64_datatype(), false);
if with_inverted {
column_schema = column_schema.with_inverted_index(true);
}
builder
.push_column_metadata(ColumnMetadata {
column_schema: ColumnSchema::new("a", ConcreteDataType::int64_datatype(), false),
semantic_type: if with_tag {
SemanticType::Tag
} else {
SemanticType::Field
},
column_schema,
semantic_type: SemanticType::Field,
column_id: 1,
})
.push_column_metadata(ColumnMetadata {
@@ -432,10 +432,6 @@ mod tests {
column_id: 3,
});
if with_tag {
builder.primary_key(vec![1]);
}
if with_fulltext {
let column_schema =
ColumnSchema::new("text", ConcreteDataType::string_datatype(), true)
@@ -502,7 +498,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_inverted: true,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -532,7 +528,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_inverted: true,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -607,7 +603,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_tag: false,
with_inverted: false,
with_fulltext: true,
with_skipping_bloom: true,
});
@@ -630,7 +626,7 @@ mod tests {
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_inverted: true,
with_fulltext: false,
with_skipping_bloom: true,
});
@@ -653,7 +649,7 @@ mod tests {
assert!(indexer.bloom_filter_indexer.is_some());
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_inverted: true,
with_fulltext: true,
with_skipping_bloom: false,
});
@@ -683,7 +679,7 @@ mod tests {
let intm_manager = mock_intm_mgr(dir.path().to_string_lossy()).await;
let metadata = mock_region_metadata(MetaConfig {
with_tag: true,
with_inverted: true,
with_fulltext: true,
with_skipping_bloom: true,
});

View File

@@ -47,7 +47,8 @@ pub fn sst_region_metadata() -> RegionMetadata {
"tag_0".to_string(),
ConcreteDataType::string_datatype(),
true,
),
)
.with_inverted_index(true),
semantic_type: SemanticType::Tag,
column_id: 0,
})

View File

@@ -340,27 +340,16 @@ impl RegionMetadata {
}
/// Gets the column ids to be indexed by inverted index.
///
/// If there is no column with inverted index key, it will use primary key columns.
pub fn inverted_indexed_column_ids<'a>(
&self,
ignore_column_ids: impl Iterator<Item = &'a ColumnId>,
) -> HashSet<ColumnId> {
// Default to use primary key columns as inverted index columns.
let pk_as_inverted_index = !self
let mut inverted_index = self
.column_metadatas
.iter()
.any(|c| c.column_schema.has_inverted_index_key());
let mut inverted_index: HashSet<_> = if pk_as_inverted_index {
self.primary_key_columns().map(|c| c.column_id).collect()
} else {
self.column_metadatas
.iter()
.filter(|column| column.column_schema.is_inverted_indexed())
.map(|column| column.column_id)
.collect()
};
.filter(|column| column.column_schema.is_inverted_indexed())
.map(|column| column.column_id)
.collect::<HashSet<_>>();
for ignored in ignore_column_ids {
inverted_index.remove(ignored);