mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-04 20:32:56 +00:00
feat: add backend field to fulltext options (#5806)
* feat: add backend field to fulltext options Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * update proto Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix option conv Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix display Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * polish Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -1053,7 +1053,7 @@ dependencies = [
|
||||
"bitflags 2.9.0",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.11.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
@@ -4689,7 +4689,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "greptime-proto"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=103948cbce833e1a17ee7083f5ba79564d08d6ec#103948cbce833e1a17ee7083f5ba79564d08d6ec"
|
||||
source = "git+https://github.com/GreptimeTeam/greptime-proto.git?branch=zhongzc%2Falter-fulltext-backend#b794184a4ce71e7fb7e1dfe17821c5a472a79588"
|
||||
dependencies = [
|
||||
"prost 0.13.3",
|
||||
"serde",
|
||||
@@ -6252,7 +6252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.52.6",
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8885,7 +8885,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"heck 0.5.0",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.11.0",
|
||||
"log",
|
||||
"multimap",
|
||||
"once_cell",
|
||||
@@ -8931,7 +8931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools 0.13.0",
|
||||
"itertools 0.11.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.96",
|
||||
@@ -13554,7 +13554,7 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -130,7 +130,8 @@ etcd-client = "0.14"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3"
|
||||
futures-util = "0.3"
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" }
|
||||
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", branch = "zhongzc/alter-fulltext-backend" }
|
||||
# greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" }
|
||||
hex = "0.4"
|
||||
http = "1"
|
||||
humantime = "2.1"
|
||||
|
||||
@@ -15,10 +15,13 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use datatypes::schema::{
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexOptions,
|
||||
SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY,
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions,
|
||||
SkippingIndexOptions, SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
SKIPPING_INDEX_KEY,
|
||||
};
|
||||
use greptime_proto::v1::{
|
||||
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
|
||||
};
|
||||
use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::error::{self, Result};
|
||||
@@ -142,13 +145,21 @@ pub fn options_from_inverted() -> ColumnOptions {
|
||||
}
|
||||
|
||||
/// Tries to construct a `FulltextAnalyzer` from the given analyzer.
|
||||
pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer {
|
||||
pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer {
|
||||
match analyzer {
|
||||
Analyzer::English => FulltextAnalyzer::English,
|
||||
Analyzer::Chinese => FulltextAnalyzer::Chinese,
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to construct a `FulltextBackend` from the given backend.
|
||||
pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend {
|
||||
match backend {
|
||||
PbFulltextBackend::Bloom => FulltextBackend::Bloom,
|
||||
PbFulltextBackend::Tantivy => FulltextBackend::Tantivy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to construct a `SkippingIndexType` from the given skipping index type.
|
||||
pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType {
|
||||
match skipping_index_type {
|
||||
@@ -160,7 +171,7 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp
|
||||
mod tests {
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::FulltextAnalyzer;
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
|
||||
|
||||
use super::*;
|
||||
use crate::v1::ColumnDataType;
|
||||
@@ -219,13 +230,14 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
})
|
||||
.unwrap();
|
||||
schema.set_inverted_index(true);
|
||||
let options = options_from_column_schema(&schema).unwrap();
|
||||
assert_eq!(
|
||||
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
|
||||
);
|
||||
assert_eq!(
|
||||
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
|
||||
@@ -239,11 +251,12 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
};
|
||||
let options = options_from_fulltext(&fulltext).unwrap().unwrap();
|
||||
assert_eq!(
|
||||
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,11 +15,13 @@
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::add_column_location::LocationType;
|
||||
use api::v1::alter_table_expr::Kind;
|
||||
use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
|
||||
use api::v1::column_def::{
|
||||
as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type,
|
||||
};
|
||||
use api::v1::{
|
||||
column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr,
|
||||
DropColumns, ModifyColumnTypes, RenameTable, SemanticType,
|
||||
SkippingIndexType as PbSkippingIndexType,
|
||||
DropColumns, FulltextBackend as PbFulltextBackend, ModifyColumnTypes, RenameTable,
|
||||
SemanticType, SkippingIndexType as PbSkippingIndexType,
|
||||
};
|
||||
use common_query::AddColumnLocation;
|
||||
use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions};
|
||||
@@ -126,11 +128,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
|
||||
column_name: f.column_name.clone(),
|
||||
options: FulltextOptions {
|
||||
enable: f.enable,
|
||||
analyzer: as_fulltext_option(
|
||||
analyzer: as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(f.analyzer)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
case_sensitive: f.case_sensitive,
|
||||
backend: as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(f.backend)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -28,8 +28,9 @@ use snafu::{ensure, ResultExt};
|
||||
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
|
||||
use crate::prelude::ConcreteDataType;
|
||||
pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions,
|
||||
SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata,
|
||||
SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
|
||||
@@ -46,6 +46,7 @@ pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index";
|
||||
pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
|
||||
|
||||
/// Keys used in SKIPPING index options
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
@@ -514,6 +515,9 @@ pub struct FulltextOptions {
|
||||
/// Whether the fulltext index is case-sensitive.
|
||||
#[serde(default)]
|
||||
pub case_sensitive: bool,
|
||||
/// The fulltext backend to use.
|
||||
#[serde(default)]
|
||||
pub backend: FulltextBackend,
|
||||
}
|
||||
|
||||
impl fmt::Display for FulltextOptions {
|
||||
@@ -522,11 +526,30 @@ impl fmt::Display for FulltextOptions {
|
||||
if self.enable {
|
||||
write!(f, ", analyzer={}", self.analyzer)?;
|
||||
write!(f, ", case_sensitive={}", self.case_sensitive)?;
|
||||
write!(f, ", backend={}", self.backend)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The backend of the fulltext index.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum FulltextBackend {
|
||||
#[default]
|
||||
Tantivy,
|
||||
Bloom, // TODO(zhongzc): when bloom is ready, use it as default
|
||||
}
|
||||
|
||||
impl fmt::Display for FulltextBackend {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
FulltextBackend::Tantivy => write!(f, "tantivy"),
|
||||
FulltextBackend::Bloom => write!(f, "bloom"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<HashMap<String, String>> for FulltextOptions {
|
||||
type Error = Error;
|
||||
|
||||
@@ -575,6 +598,19 @@ impl TryFrom<HashMap<String, String>> for FulltextOptions {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) {
|
||||
match backend.to_ascii_lowercase().as_str() {
|
||||
"bloom" => fulltext_options.backend = FulltextBackend::Bloom,
|
||||
"tantivy" => fulltext_options.backend = FulltextBackend::Tantivy,
|
||||
_ => {
|
||||
return InvalidFulltextOptionSnafu {
|
||||
msg: format!("{backend}, expected: 'bloom' | 'tantivy'"),
|
||||
}
|
||||
.fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fulltext_options)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -26,16 +27,20 @@ use crate::external_provider::ExternalTempFileProvider;
|
||||
use crate::fulltext_index::create::FulltextIndexCreator;
|
||||
use crate::fulltext_index::error::{
|
||||
AbortedSnafu, BiErrorsSnafu, BloomFilterFinishSnafu, ExternalSnafu, PuffinAddBlobSnafu, Result,
|
||||
SerializeToJsonSnafu,
|
||||
};
|
||||
use crate::fulltext_index::tokenizer::{Analyzer, ChineseTokenizer, EnglishTokenizer};
|
||||
use crate::fulltext_index::Config;
|
||||
|
||||
const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192;
|
||||
|
||||
pub const KEY_FULLTEXT_CONFIG: &str = "fulltext_config";
|
||||
|
||||
/// `BloomFilterFulltextIndexCreator` is for creating a fulltext index using a bloom filter.
|
||||
pub struct BloomFilterFulltextIndexCreator {
|
||||
inner: Option<BloomFilterCreator>,
|
||||
analyzer: Analyzer,
|
||||
config: Config,
|
||||
}
|
||||
|
||||
impl BloomFilterFulltextIndexCreator {
|
||||
@@ -61,6 +66,7 @@ impl BloomFilterFulltextIndexCreator {
|
||||
Self {
|
||||
inner: Some(inner),
|
||||
analyzer,
|
||||
config,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -89,10 +95,17 @@ impl FulltextIndexCreator for BloomFilterFulltextIndexCreator {
|
||||
|
||||
let (tx, rx) = tokio::io::duplex(PIPE_BUFFER_SIZE_FOR_SENDING_BLOB);
|
||||
|
||||
let property_key = KEY_FULLTEXT_CONFIG.to_string();
|
||||
let property_value = serde_json::to_string(&self.config).context(SerializeToJsonSnafu)?;
|
||||
|
||||
let (index_finish, puffin_add_blob) = futures::join!(
|
||||
creator.finish(tx.compat_write()),
|
||||
// TODO(zhongzc): add fulltext config properties
|
||||
puffin_writer.put_blob(blob_key, rx.compat(), put_options, Default::default())
|
||||
puffin_writer.put_blob(
|
||||
blob_key,
|
||||
rx.compat(),
|
||||
put_options,
|
||||
HashMap::from([(property_key, property_value)]),
|
||||
)
|
||||
);
|
||||
|
||||
match (
|
||||
|
||||
@@ -104,6 +104,22 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to serialize to json"))]
|
||||
SerializeToJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to deserialize from json"))]
|
||||
DeserializeFromJson {
|
||||
#[snafu(source)]
|
||||
error: serde_json::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for Error {
|
||||
@@ -122,6 +138,8 @@ impl ErrorExt for Error {
|
||||
PuffinAddBlob { source, .. } => source.status_code(),
|
||||
|
||||
External { source, .. } => source.status_code(),
|
||||
|
||||
SerializeToJson { .. } | DeserializeFromJson { .. } => StatusCode::Internal,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use common_recordbatch::RecordBatches;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextOptions};
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions};
|
||||
use store_api::metadata::ColumnMetadata;
|
||||
use store_api::region_engine::{RegionEngine, RegionRole};
|
||||
use store_api::region_request::{
|
||||
@@ -90,6 +90,7 @@ fn alter_column_fulltext_options() -> RegionAlterRequest {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -557,6 +558,7 @@ async fn test_alter_column_fulltext_options() {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
};
|
||||
let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| {
|
||||
let current_fulltext_options = engine
|
||||
|
||||
@@ -46,6 +46,8 @@ pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
|
||||
pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
|
||||
pub(crate) const TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index";
|
||||
|
||||
const DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY: usize = 8096;
|
||||
|
||||
/// Output of the index creation.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct IndexOutput {
|
||||
@@ -292,6 +294,7 @@ impl IndexerBuilderImpl {
|
||||
&self.intermediate_manager,
|
||||
&self.metadata,
|
||||
self.fulltext_index_config.compress,
|
||||
DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY,
|
||||
mem_limit,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -13,9 +13,11 @@
|
||||
// limitations under the License.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use common_telemetry::warn;
|
||||
use datatypes::schema::FulltextAnalyzer;
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
|
||||
use index::fulltext_index::create::{
|
||||
BloomFilterFulltextIndexCreator, FulltextIndexCreator, TantivyFulltextIndexCreator,
|
||||
};
|
||||
@@ -33,7 +35,9 @@ use crate::error::{
|
||||
use crate::read::Batch;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::index::fulltext_index::{INDEX_BLOB_TYPE_BLOOM, INDEX_BLOB_TYPE_TANTIVY};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::intermediate::{
|
||||
IntermediateLocation, IntermediateManager, TempFileProvider,
|
||||
};
|
||||
use crate::sst::index::puffin_manager::SstPuffinWriter;
|
||||
use crate::sst::index::statistics::{ByteCount, RowCount, Statistics};
|
||||
use crate::sst::index::TYPE_FULLTEXT_INDEX;
|
||||
@@ -56,6 +60,7 @@ impl FulltextIndexer {
|
||||
intermediate_manager: &IntermediateManager,
|
||||
metadata: &RegionMetadataRef,
|
||||
compress: bool,
|
||||
bloom_row_granularity: usize,
|
||||
mem_limit: usize,
|
||||
) -> Result<Option<Self>> {
|
||||
let mut creators = HashMap::new();
|
||||
@@ -86,11 +91,29 @@ impl FulltextIndexer {
|
||||
case_sensitive: options.case_sensitive,
|
||||
};
|
||||
|
||||
// TODO(zhongzc): according to fulltext options, choose in the Tantivy flavor or Bloom Filter flavor.
|
||||
let creator = TantivyFulltextIndexCreator::new(&intm_path, config, mem_limit)
|
||||
.await
|
||||
.context(CreateFulltextCreatorSnafu)?;
|
||||
let inner = AltFulltextCreator::Tantivy(creator);
|
||||
let inner = match options.backend {
|
||||
FulltextBackend::Tantivy => {
|
||||
let creator = TantivyFulltextIndexCreator::new(&intm_path, config, mem_limit)
|
||||
.await
|
||||
.context(CreateFulltextCreatorSnafu)?;
|
||||
AltFulltextCreator::Tantivy(creator)
|
||||
}
|
||||
FulltextBackend::Bloom => {
|
||||
let temp_file_provider = Arc::new(TempFileProvider::new(
|
||||
IntermediateLocation::new(&metadata.region_id, sst_file_id),
|
||||
intermediate_manager.clone(),
|
||||
));
|
||||
let global_memory_usage = Arc::new(AtomicUsize::new(0));
|
||||
let creator = BloomFilterFulltextIndexCreator::new(
|
||||
config,
|
||||
bloom_row_granularity,
|
||||
temp_file_provider,
|
||||
global_memory_usage,
|
||||
Some(mem_limit),
|
||||
);
|
||||
AltFulltextCreator::Bloom(creator)
|
||||
}
|
||||
};
|
||||
|
||||
creators.insert(
|
||||
column_id,
|
||||
@@ -377,6 +400,7 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: true,
|
||||
backend: FulltextBackend::Tantivy,
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
@@ -392,6 +416,7 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Tantivy,
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
@@ -407,6 +432,7 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Tantivy,
|
||||
})
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
@@ -504,6 +530,7 @@ mod tests {
|
||||
&intm_mgr,
|
||||
®ion_metadata,
|
||||
true,
|
||||
8096,
|
||||
1024,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -21,16 +21,19 @@ use api::v1::column_def::options_from_column_schema;
|
||||
use api::v1::{
|
||||
set_index, unset_index, AddColumn, AddColumns, AlterDatabaseExpr, AlterTableExpr, Analyzer,
|
||||
ColumnDataType, ColumnDataTypeExtension, CreateFlowExpr, CreateTableExpr, CreateViewExpr,
|
||||
DropColumn, DropColumns, ExpireAfter, ModifyColumnType, ModifyColumnTypes, RenameTable,
|
||||
SemanticType, SetDatabaseOptions, SetFulltext, SetIndex, SetInverted, SetSkipping,
|
||||
SetTableOptions, SkippingIndexType as PbSkippingIndexType, TableName, UnsetDatabaseOptions,
|
||||
UnsetFulltext, UnsetIndex, UnsetInverted, UnsetSkipping, UnsetTableOptions,
|
||||
DropColumn, DropColumns, ExpireAfter, FulltextBackend as PbFulltextBackend, ModifyColumnType,
|
||||
ModifyColumnTypes, RenameTable, SemanticType, SetDatabaseOptions, SetFulltext, SetIndex,
|
||||
SetInverted, SetSkipping, SetTableOptions, SkippingIndexType as PbSkippingIndexType, TableName,
|
||||
UnsetDatabaseOptions, UnsetFulltext, UnsetIndex, UnsetInverted, UnsetSkipping,
|
||||
UnsetTableOptions,
|
||||
};
|
||||
use common_error::ext::BoxedError;
|
||||
use common_grpc_expr::util::ColumnExpr;
|
||||
use common_time::Timezone;
|
||||
use datafusion::sql::planner::object_name_to_table_reference;
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, Schema, SkippingIndexType, COMMENT_KEY};
|
||||
use datatypes::schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextBackend, Schema, SkippingIndexType, COMMENT_KEY,
|
||||
};
|
||||
use file_engine::FileOptions;
|
||||
use query::sql::{
|
||||
check_file_to_table_schema_compatibility, file_column_schemas_to_table,
|
||||
@@ -581,6 +584,10 @@ pub(crate) fn to_alter_table_expr(
|
||||
FulltextAnalyzer::Chinese => Analyzer::Chinese.into(),
|
||||
},
|
||||
case_sensitive: options.case_sensitive,
|
||||
backend: match options.backend {
|
||||
FulltextBackend::Bloom => PbFulltextBackend::Bloom.into(),
|
||||
FulltextBackend::Tantivy => PbFulltextBackend::Tantivy.into(),
|
||||
},
|
||||
})),
|
||||
},
|
||||
sql::statements::alter::SetIndexOperation::Inverted { column_name } => SetIndex {
|
||||
|
||||
@@ -444,7 +444,7 @@ mod tests {
|
||||
use std::assert_matches::assert_matches;
|
||||
|
||||
use common_error::ext::ErrorExt;
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextOptions};
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextBackend, FulltextOptions};
|
||||
use sqlparser::ast::{ColumnDef, ColumnOption, ColumnOptionDef, DataType};
|
||||
|
||||
use super::*;
|
||||
@@ -958,7 +958,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_alter_column_fulltext() {
|
||||
let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false')";
|
||||
let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom')";
|
||||
let mut result =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap();
|
||||
@@ -984,7 +984,8 @@ mod tests {
|
||||
FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
*options
|
||||
);
|
||||
|
||||
@@ -28,8 +28,9 @@ use datafusion_sql::planner::{ContextProvider, SqlToRel};
|
||||
use datafusion_sql::TableReference;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -124,6 +125,7 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool {
|
||||
[
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
|
||||
@@ -181,7 +181,7 @@ impl Display for AlterTableOperation {
|
||||
column_name,
|
||||
options,
|
||||
} => {
|
||||
write!(f, "MODIFY COLUMN {column_name} SET FULLTEXT INDEX WITH(analyzer={0}, case_sensitive={1})", options.analyzer, options.case_sensitive)
|
||||
write!(f, "MODIFY COLUMN {column_name} SET FULLTEXT INDEX WITH(analyzer={0}, case_sensitive={1}, backend={2})", options.analyzer, options.case_sensitive, options.backend)
|
||||
}
|
||||
SetIndexOperation::Inverted { column_name } => {
|
||||
write!(f, "MODIFY COLUMN {column_name} SET INVERTED INDEX")
|
||||
@@ -425,7 +425,7 @@ ALTER TABLE monitor RENAME monitor_new"#,
|
||||
}
|
||||
}
|
||||
|
||||
let sql = "ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false')";
|
||||
let sql = "ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom')";
|
||||
let stmts =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap();
|
||||
@@ -437,7 +437,7 @@ ALTER TABLE monitor RENAME monitor_new"#,
|
||||
let new_sql = format!("\n{}", set);
|
||||
assert_eq!(
|
||||
r#"
|
||||
ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer=English, case_sensitive=false)"#,
|
||||
ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer=English, case_sensitive=false, backend=bloom)"#,
|
||||
&new_sql
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1030,7 +1030,7 @@ fn unset_column_fulltext_options(
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::ColumnSchema;
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -1455,8 +1455,9 @@ mod test {
|
||||
column_name: "b".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: datatypes::schema::FulltextAnalyzer::Chinese,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: true,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
@@ -17,15 +17,17 @@ use std::fmt::{self, Display};
|
||||
|
||||
use api::helper::ColumnDataTypeWrapper;
|
||||
use api::v1::add_column_location::LocationType;
|
||||
use api::v1::column_def::{as_fulltext_option, as_skipping_index_type};
|
||||
use api::v1::column_def::{
|
||||
as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type,
|
||||
};
|
||||
use api::v1::region::{
|
||||
alter_request, compact_request, region_request, AlterRequest, AlterRequests, CloseRequest,
|
||||
CompactRequest, CreateRequest, CreateRequests, DeleteRequests, DropRequest, DropRequests,
|
||||
FlushRequest, InsertRequests, OpenRequest, TruncateRequest,
|
||||
};
|
||||
use api::v1::{
|
||||
self, set_index, Analyzer, Option as PbOption, Rows, SemanticType,
|
||||
SkippingIndexType as PbSkippingIndexType, WriteHint,
|
||||
self, set_index, Analyzer, FulltextBackend as PbFulltextBackend, Option as PbOption, Rows,
|
||||
SemanticType, SkippingIndexType as PbSkippingIndexType, WriteHint,
|
||||
};
|
||||
pub use common_base::AffectedRows;
|
||||
use common_time::TimeToLive;
|
||||
@@ -729,10 +731,13 @@ impl TryFrom<alter_request::Kind> for AlterKind {
|
||||
column_name: x.column_name.clone(),
|
||||
options: FulltextOptions {
|
||||
enable: x.enable,
|
||||
analyzer: as_fulltext_option(
|
||||
analyzer: as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(x.analyzer).context(DecodeProtoSnafu)?,
|
||||
),
|
||||
case_sensitive: x.case_sensitive,
|
||||
backend: as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(x.backend).context(DecodeProtoSnafu)?,
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -1149,7 +1154,7 @@ mod tests {
|
||||
use api::v1::region::RegionColumnDef;
|
||||
use api::v1::{ColumnDataType, ColumnDef};
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer};
|
||||
use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend};
|
||||
|
||||
use super::*;
|
||||
use crate::metadata::RegionMetadataBuilder;
|
||||
@@ -1631,6 +1636,7 @@ mod tests {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1233,7 +1233,9 @@ mod tests {
|
||||
use common_error::ext::ErrorExt;
|
||||
use common_error::status_code::StatusCode;
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder};
|
||||
use datatypes::schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextBackend, Schema, SchemaBuilder,
|
||||
};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -1806,8 +1808,9 @@ mod tests {
|
||||
column_name: "my_tag_first".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: datatypes::schema::FulltextAnalyzer::Chinese,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: true,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user