• 中文、日文、韩文(CJK)和泰语语言
Manticore 提供了内置支持,用于索引具有连续书写的语言(即不使用空格或其他标记来区分单词或句子的语言)。这使您能够以两种不同的方式处理这些语言的文本:
使用 ICU 库进行精确分词。目前仅支持中文。
SQL:
CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'icu_chinese'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'icu_chinese'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'cont',
'morphology' => 'icu_chinese'
]);
Python:
utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'cont\' morphology = \'icu_chinese\'')
Javascript:
res = await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'cont\' morphology = \'icu_chinese\'');
Java:
utilsApi.sql("CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'icu_chinese'");
C#:
utilsApi.Sql("CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'icu_chinese'");
table products {
charset_table = cont
morphology = icu_chinese
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
使用 Jieba 库进行精确分词。与 ICU 类似,目前也仅支持中文。
SQL:
CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'jieba_chinese'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'jieba_chinese'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'cont',
'morphology' => 'jieba_chinese'
]);
Python:
utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'cont\' morphology = \'jieba_chinese\'')
Javascript:
res = await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'cont\' morphology = \'jieba_chinese\'');
Java:
utilsApi.sql("CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'jieba_chinese'");
C#:
utilsApi.Sql("CREATE TABLE products(title text, price float) charset_table = 'cont' morphology = 'jieba_chinese'");
table products {
charset_table = cont
morphology = jieba_chinese
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
使用 N-gram 选项 ngram_len 和 ngram_chars 进行基本支持。对于使用连续字符的每种语言,Manticore 提供了单独的字符集表(如
chinese
,korean
,japanese
,thai
)。另外,你也可以使用通用的cont
字符集表来同时支持所有 CJK 和泰语,或者仅使用cjk
字符集来支持所有 CJK 语言。
SQL:
CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cont'
/* Or, alternatively */
CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cjk,thai'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cont'"
/* Or, alternatively */
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cjk,thai'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'non_cont',
'ngram_len' => '1',
'ngram_chars' => 'cont'
]);
Python:
utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'non_cont\' ngram_len = \'1\' ngram_chars = \'cont\'')
Javascript:
res = await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'non_cont\' ngram_len = \'1\' ngram_chars = \'cont\'');
java:
utilsApi.sql("CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cont'");
C#:
utilsApi.Sql("CREATE TABLE products(title text, price float) charset_table = 'non_cont' ngram_len = '1' ngram_chars = 'cont'");
table products {
charset_table = non_cont
ngram_len = 1
ngram_chars = cont
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
此外,Manticore 内置支持中文停用词,别名为 zh
。
SQL:
CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'"
PHP:
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'chinese',
'morphology' => 'icu_chinese',
'stopwords' => 'zh'
]);
Python:
utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'chinese\' morphology = \'icu_chinese\' stopwords = \'zh\'')
Javascript:
res = await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'chinese\' morphology = \'icu_chinese\' stopwords = \'zh\'');
java:
utilsApi.sql("CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'");
C#:
utilsApi.Sql("CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'");
table products {
charset_table = chinese
morphology = icu_chinese
stopwords = zh
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
最后更新于