This commit is contained in:
tuna2134
2024-09-09 15:05:56 +00:00
parent f435d0f94c
commit 30c2bd1e77
7 changed files with 1133 additions and 4 deletions

10
Cargo.lock generated
View File

@@ -1293,24 +1293,26 @@ dependencies = [
"once_cell",
"ort",
"regex",
"serde",
"serde_json",
"thiserror",
"tokenizers",
]
[[package]]
name = "serde"
version = "1.0.209"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.209"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",

View File

@@ -10,5 +10,7 @@ ndarray = "0.16.1"
once_cell = "1.19.0"
ort = { git = "https://github.com/pykeio/ort.git", version = "2.0.0-rc.5" }
regex = "1.10.6"
serde = { version = "1.0.210", features = ["derive"] }
serde_json = "1.0.128"
thiserror = "1.0.63"
tokenizers = "0.20.0"

194
sbv2_core/convert.py Normal file
View File

@@ -0,0 +1,194 @@
import json
__MORA_LIST_MINIMUM: list[tuple[str, str | None, str]] = [
("ヴォ", "v", "o"),
("ヴェ", "v", "e"),
("ヴィ", "v", "i"),
("ヴァ", "v", "a"),
("", "v", "u"),
("", None, "N"),
("", "w", "a"),
("", "r", "o"),
("", "r", "e"),
("", "r", "u"),
("リョ", "ry", "o"),
("リュ", "ry", "u"),
("リャ", "ry", "a"),
("リェ", "ry", "e"),
("", "r", "i"),
("", "r", "a"),
("", "y", "o"),
("", "y", "u"),
("", "y", "a"),
("", "m", "o"),
("", "m", "e"),
("", "m", "u"),
("ミョ", "my", "o"),
("ミュ", "my", "u"),
("ミャ", "my", "a"),
("ミェ", "my", "e"),
("", "m", "i"),
("", "m", "a"),
("", "p", "o"),
("", "b", "o"),
("", "h", "o"),
("", "p", "e"),
("", "b", "e"),
("", "h", "e"),
("", "p", "u"),
("", "b", "u"),
("フォ", "f", "o"),
("フェ", "f", "e"),
("フィ", "f", "i"),
("ファ", "f", "a"),
("", "f", "u"),
("ピョ", "py", "o"),
("ピュ", "py", "u"),
("ピャ", "py", "a"),
("ピェ", "py", "e"),
("", "p", "i"),
("ビョ", "by", "o"),
("ビュ", "by", "u"),
("ビャ", "by", "a"),
("ビェ", "by", "e"),
("", "b", "i"),
("ヒョ", "hy", "o"),
("ヒュ", "hy", "u"),
("ヒャ", "hy", "a"),
("ヒェ", "hy", "e"),
("", "h", "i"),
("", "p", "a"),
("", "b", "a"),
("", "h", "a"),
("", "n", "o"),
("", "n", "e"),
("", "n", "u"),
("ニョ", "ny", "o"),
("ニュ", "ny", "u"),
("ニャ", "ny", "a"),
("ニェ", "ny", "e"),
("", "n", "i"),
("", "n", "a"),
("ドゥ", "d", "u"),
("", "d", "o"),
("トゥ", "t", "u"),
("", "t", "o"),
("デョ", "dy", "o"),
("デュ", "dy", "u"),
("デャ", "dy", "a"),
# ("デェ", "dy", "e"),
("ディ", "d", "i"),
("", "d", "e"),
("テョ", "ty", "o"),
("テュ", "ty", "u"),
("テャ", "ty", "a"),
("ティ", "t", "i"),
("", "t", "e"),
("ツォ", "ts", "o"),
("ツェ", "ts", "e"),
("ツィ", "ts", "i"),
("ツァ", "ts", "a"),
("", "ts", "u"),
("", None, "q"), # 「cl」から「q」に変更
("チョ", "ch", "o"),
("チュ", "ch", "u"),
("チャ", "ch", "a"),
("チェ", "ch", "e"),
("", "ch", "i"),
("", "d", "a"),
("", "t", "a"),
("", "z", "o"),
("", "s", "o"),
("", "z", "e"),
("", "s", "e"),
("ズィ", "z", "i"),
("", "z", "u"),
("スィ", "s", "i"),
("", "s", "u"),
("ジョ", "j", "o"),
("ジュ", "j", "u"),
("ジャ", "j", "a"),
("ジェ", "j", "e"),
("", "j", "i"),
("ショ", "sh", "o"),
("シュ", "sh", "u"),
("シャ", "sh", "a"),
("シェ", "sh", "e"),
("", "sh", "i"),
("", "z", "a"),
("", "s", "a"),
("", "g", "o"),
("", "k", "o"),
("", "g", "e"),
("", "k", "e"),
("グヮ", "gw", "a"),
("", "g", "u"),
("クヮ", "kw", "a"),
("", "k", "u"),
("ギョ", "gy", "o"),
("ギュ", "gy", "u"),
("ギャ", "gy", "a"),
("ギェ", "gy", "e"),
("", "g", "i"),
("キョ", "ky", "o"),
("キュ", "ky", "u"),
("キャ", "ky", "a"),
("キェ", "ky", "e"),
("", "k", "i"),
("", "g", "a"),
("", "k", "a"),
("", None, "o"),
("", None, "e"),
("ウォ", "w", "o"),
("ウェ", "w", "e"),
("ウィ", "w", "i"),
("", None, "u"),
("イェ", "y", "e"),
("", None, "i"),
("", None, "a"),
]
__MORA_LIST_ADDITIONAL: list[tuple[str, str | None, str]] = [
("ヴョ", "by", "o"),
("ヴュ", "by", "u"),
("ヴャ", "by", "a"),
("", None, "o"),
("", None, "e"),
("", None, "i"),
("", "w", "a"),
("", "y", "o"),
("", "y", "u"),
("", "z", "u"),
("", "j", "i"),
("", "k", "e"),
("", "y", "a"),
("", None, "o"),
("", None, "e"),
("", None, "u"),
("", None, "i"),
("", None, "a"),
]
data = {"minimum": [], "additional": []}
for mora, consonant, vowel in __MORA_LIST_MINIMUM:
data["minimum"].append(
{
"mora": mora,
"consonant": consonant,
"vowel": vowel,
}
)
for mora, consonant, vowel in __MORA_LIST_ADDITIONAL:
data["additional"].append(
{
"mora": mora,
"consonant": consonant,
"vowel": vowel,
}
)
with open("src/mora_list.json", "w") as f:
json.dump(data, f, ensure_ascii=False, indent=4)

View File

@@ -2,6 +2,7 @@ pub mod bert;
pub mod error;
pub mod norm;
pub mod text;
pub mod mora;
pub fn add(left: usize, right: usize) -> usize {
left + right

34
sbv2_core/src/mora.rs Normal file
View File

@@ -0,0 +1,34 @@
use serde::{Deserialize, Serialize};
use once_cell::sync::Lazy;
use std::collections::HashMap;
#[derive(Debug, Serialize, Deserialize)]
pub struct Mora {
pub mora: String,
pub consonant: Option<String>,
pub vowel: String,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct MoraFile {
pub minimum: Vec<Mora>,
pub additional: Vec<Mora>,
}
static MORA_LIST_MINIMUM: Lazy<Vec<Mora>> = Lazy::new(|| {
let data: MoraFile = serde_json::from_str(include_str!("./mora_list.json")).unwrap();
data.minimum
});
static MORA_LIST_ADDITIONAL: Lazy<Vec<Mora>> = Lazy::new(|| {
let data: MoraFile = serde_json::from_str(include_str!("./mora_list.json")).unwrap();
data.additional
});
pub static MORA_KATA_TO_MORA_PHONEMES: Lazy<HashMap<String, (Option<String>, String)>> = Lazy::new(|| {
let mut map = HashMap::new();
for mora in MORA_LIST_MINIMUM.iter().chain(MORA_LIST_ADDITIONAL.iter()) {
map.insert(mora.mora.clone(), (mora.consonant.clone(), mora.vowel.clone()));
}
map
});

View File

@@ -0,0 +1,816 @@
{
"minimum": [
{
"mora": "ヴォ",
"consonant": "v",
"vowel": "o"
},
{
"mora": "ヴェ",
"consonant": "v",
"vowel": "e"
},
{
"mora": "ヴィ",
"consonant": "v",
"vowel": "i"
},
{
"mora": "ヴァ",
"consonant": "v",
"vowel": "a"
},
{
"mora": "ヴ",
"consonant": "v",
"vowel": "u"
},
{
"mora": "ン",
"consonant": null,
"vowel": "N"
},
{
"mora": "ワ",
"consonant": "w",
"vowel": "a"
},
{
"mora": "ロ",
"consonant": "r",
"vowel": "o"
},
{
"mora": "レ",
"consonant": "r",
"vowel": "e"
},
{
"mora": "ル",
"consonant": "r",
"vowel": "u"
},
{
"mora": "リョ",
"consonant": "ry",
"vowel": "o"
},
{
"mora": "リュ",
"consonant": "ry",
"vowel": "u"
},
{
"mora": "リャ",
"consonant": "ry",
"vowel": "a"
},
{
"mora": "リェ",
"consonant": "ry",
"vowel": "e"
},
{
"mora": "リ",
"consonant": "r",
"vowel": "i"
},
{
"mora": "ラ",
"consonant": "r",
"vowel": "a"
},
{
"mora": "ヨ",
"consonant": "y",
"vowel": "o"
},
{
"mora": "ユ",
"consonant": "y",
"vowel": "u"
},
{
"mora": "ヤ",
"consonant": "y",
"vowel": "a"
},
{
"mora": "モ",
"consonant": "m",
"vowel": "o"
},
{
"mora": "メ",
"consonant": "m",
"vowel": "e"
},
{
"mora": "ム",
"consonant": "m",
"vowel": "u"
},
{
"mora": "ミョ",
"consonant": "my",
"vowel": "o"
},
{
"mora": "ミュ",
"consonant": "my",
"vowel": "u"
},
{
"mora": "ミャ",
"consonant": "my",
"vowel": "a"
},
{
"mora": "ミェ",
"consonant": "my",
"vowel": "e"
},
{
"mora": "ミ",
"consonant": "m",
"vowel": "i"
},
{
"mora": "マ",
"consonant": "m",
"vowel": "a"
},
{
"mora": "ポ",
"consonant": "p",
"vowel": "o"
},
{
"mora": "ボ",
"consonant": "b",
"vowel": "o"
},
{
"mora": "ホ",
"consonant": "h",
"vowel": "o"
},
{
"mora": "ペ",
"consonant": "p",
"vowel": "e"
},
{
"mora": "ベ",
"consonant": "b",
"vowel": "e"
},
{
"mora": "ヘ",
"consonant": "h",
"vowel": "e"
},
{
"mora": "プ",
"consonant": "p",
"vowel": "u"
},
{
"mora": "ブ",
"consonant": "b",
"vowel": "u"
},
{
"mora": "フォ",
"consonant": "f",
"vowel": "o"
},
{
"mora": "フェ",
"consonant": "f",
"vowel": "e"
},
{
"mora": "フィ",
"consonant": "f",
"vowel": "i"
},
{
"mora": "ファ",
"consonant": "f",
"vowel": "a"
},
{
"mora": "フ",
"consonant": "f",
"vowel": "u"
},
{
"mora": "ピョ",
"consonant": "py",
"vowel": "o"
},
{
"mora": "ピュ",
"consonant": "py",
"vowel": "u"
},
{
"mora": "ピャ",
"consonant": "py",
"vowel": "a"
},
{
"mora": "ピェ",
"consonant": "py",
"vowel": "e"
},
{
"mora": "ピ",
"consonant": "p",
"vowel": "i"
},
{
"mora": "ビョ",
"consonant": "by",
"vowel": "o"
},
{
"mora": "ビュ",
"consonant": "by",
"vowel": "u"
},
{
"mora": "ビャ",
"consonant": "by",
"vowel": "a"
},
{
"mora": "ビェ",
"consonant": "by",
"vowel": "e"
},
{
"mora": "ビ",
"consonant": "b",
"vowel": "i"
},
{
"mora": "ヒョ",
"consonant": "hy",
"vowel": "o"
},
{
"mora": "ヒュ",
"consonant": "hy",
"vowel": "u"
},
{
"mora": "ヒャ",
"consonant": "hy",
"vowel": "a"
},
{
"mora": "ヒェ",
"consonant": "hy",
"vowel": "e"
},
{
"mora": "ヒ",
"consonant": "h",
"vowel": "i"
},
{
"mora": "パ",
"consonant": "p",
"vowel": "a"
},
{
"mora": "バ",
"consonant": "b",
"vowel": "a"
},
{
"mora": "ハ",
"consonant": "h",
"vowel": "a"
},
{
"mora": "",
"consonant": "n",
"vowel": "o"
},
{
"mora": "ネ",
"consonant": "n",
"vowel": "e"
},
{
"mora": "ヌ",
"consonant": "n",
"vowel": "u"
},
{
"mora": "ニョ",
"consonant": "ny",
"vowel": "o"
},
{
"mora": "ニュ",
"consonant": "ny",
"vowel": "u"
},
{
"mora": "ニャ",
"consonant": "ny",
"vowel": "a"
},
{
"mora": "ニェ",
"consonant": "ny",
"vowel": "e"
},
{
"mora": "ニ",
"consonant": "n",
"vowel": "i"
},
{
"mora": "ナ",
"consonant": "n",
"vowel": "a"
},
{
"mora": "ドゥ",
"consonant": "d",
"vowel": "u"
},
{
"mora": "ド",
"consonant": "d",
"vowel": "o"
},
{
"mora": "トゥ",
"consonant": "t",
"vowel": "u"
},
{
"mora": "ト",
"consonant": "t",
"vowel": "o"
},
{
"mora": "デョ",
"consonant": "dy",
"vowel": "o"
},
{
"mora": "デュ",
"consonant": "dy",
"vowel": "u"
},
{
"mora": "デャ",
"consonant": "dy",
"vowel": "a"
},
{
"mora": "ディ",
"consonant": "d",
"vowel": "i"
},
{
"mora": "デ",
"consonant": "d",
"vowel": "e"
},
{
"mora": "テョ",
"consonant": "ty",
"vowel": "o"
},
{
"mora": "テュ",
"consonant": "ty",
"vowel": "u"
},
{
"mora": "テャ",
"consonant": "ty",
"vowel": "a"
},
{
"mora": "ティ",
"consonant": "t",
"vowel": "i"
},
{
"mora": "テ",
"consonant": "t",
"vowel": "e"
},
{
"mora": "ツォ",
"consonant": "ts",
"vowel": "o"
},
{
"mora": "ツェ",
"consonant": "ts",
"vowel": "e"
},
{
"mora": "ツィ",
"consonant": "ts",
"vowel": "i"
},
{
"mora": "ツァ",
"consonant": "ts",
"vowel": "a"
},
{
"mora": "ツ",
"consonant": "ts",
"vowel": "u"
},
{
"mora": "ッ",
"consonant": null,
"vowel": "q"
},
{
"mora": "チョ",
"consonant": "ch",
"vowel": "o"
},
{
"mora": "チュ",
"consonant": "ch",
"vowel": "u"
},
{
"mora": "チャ",
"consonant": "ch",
"vowel": "a"
},
{
"mora": "チェ",
"consonant": "ch",
"vowel": "e"
},
{
"mora": "チ",
"consonant": "ch",
"vowel": "i"
},
{
"mora": "ダ",
"consonant": "d",
"vowel": "a"
},
{
"mora": "タ",
"consonant": "t",
"vowel": "a"
},
{
"mora": "ゾ",
"consonant": "z",
"vowel": "o"
},
{
"mora": "ソ",
"consonant": "s",
"vowel": "o"
},
{
"mora": "ゼ",
"consonant": "z",
"vowel": "e"
},
{
"mora": "セ",
"consonant": "s",
"vowel": "e"
},
{
"mora": "ズィ",
"consonant": "z",
"vowel": "i"
},
{
"mora": "ズ",
"consonant": "z",
"vowel": "u"
},
{
"mora": "スィ",
"consonant": "s",
"vowel": "i"
},
{
"mora": "ス",
"consonant": "s",
"vowel": "u"
},
{
"mora": "ジョ",
"consonant": "j",
"vowel": "o"
},
{
"mora": "ジュ",
"consonant": "j",
"vowel": "u"
},
{
"mora": "ジャ",
"consonant": "j",
"vowel": "a"
},
{
"mora": "ジェ",
"consonant": "j",
"vowel": "e"
},
{
"mora": "ジ",
"consonant": "j",
"vowel": "i"
},
{
"mora": "ショ",
"consonant": "sh",
"vowel": "o"
},
{
"mora": "シュ",
"consonant": "sh",
"vowel": "u"
},
{
"mora": "シャ",
"consonant": "sh",
"vowel": "a"
},
{
"mora": "シェ",
"consonant": "sh",
"vowel": "e"
},
{
"mora": "シ",
"consonant": "sh",
"vowel": "i"
},
{
"mora": "ザ",
"consonant": "z",
"vowel": "a"
},
{
"mora": "サ",
"consonant": "s",
"vowel": "a"
},
{
"mora": "ゴ",
"consonant": "g",
"vowel": "o"
},
{
"mora": "コ",
"consonant": "k",
"vowel": "o"
},
{
"mora": "ゲ",
"consonant": "g",
"vowel": "e"
},
{
"mora": "ケ",
"consonant": "k",
"vowel": "e"
},
{
"mora": "グヮ",
"consonant": "gw",
"vowel": "a"
},
{
"mora": "グ",
"consonant": "g",
"vowel": "u"
},
{
"mora": "クヮ",
"consonant": "kw",
"vowel": "a"
},
{
"mora": "ク",
"consonant": "k",
"vowel": "u"
},
{
"mora": "ギョ",
"consonant": "gy",
"vowel": "o"
},
{
"mora": "ギュ",
"consonant": "gy",
"vowel": "u"
},
{
"mora": "ギャ",
"consonant": "gy",
"vowel": "a"
},
{
"mora": "ギェ",
"consonant": "gy",
"vowel": "e"
},
{
"mora": "ギ",
"consonant": "g",
"vowel": "i"
},
{
"mora": "キョ",
"consonant": "ky",
"vowel": "o"
},
{
"mora": "キュ",
"consonant": "ky",
"vowel": "u"
},
{
"mora": "キャ",
"consonant": "ky",
"vowel": "a"
},
{
"mora": "キェ",
"consonant": "ky",
"vowel": "e"
},
{
"mora": "キ",
"consonant": "k",
"vowel": "i"
},
{
"mora": "ガ",
"consonant": "g",
"vowel": "a"
},
{
"mora": "カ",
"consonant": "k",
"vowel": "a"
},
{
"mora": "オ",
"consonant": null,
"vowel": "o"
},
{
"mora": "エ",
"consonant": null,
"vowel": "e"
},
{
"mora": "ウォ",
"consonant": "w",
"vowel": "o"
},
{
"mora": "ウェ",
"consonant": "w",
"vowel": "e"
},
{
"mora": "ウィ",
"consonant": "w",
"vowel": "i"
},
{
"mora": "ウ",
"consonant": null,
"vowel": "u"
},
{
"mora": "イェ",
"consonant": "y",
"vowel": "e"
},
{
"mora": "イ",
"consonant": null,
"vowel": "i"
},
{
"mora": "ア",
"consonant": null,
"vowel": "a"
}
],
"additional": [
{
"mora": "ヴョ",
"consonant": "by",
"vowel": "o"
},
{
"mora": "ヴュ",
"consonant": "by",
"vowel": "u"
},
{
"mora": "ヴャ",
"consonant": "by",
"vowel": "a"
},
{
"mora": "ヲ",
"consonant": null,
"vowel": "o"
},
{
"mora": "ヱ",
"consonant": null,
"vowel": "e"
},
{
"mora": "ヰ",
"consonant": null,
"vowel": "i"
},
{
"mora": "ヮ",
"consonant": "w",
"vowel": "a"
},
{
"mora": "ョ",
"consonant": "y",
"vowel": "o"
},
{
"mora": "ュ",
"consonant": "y",
"vowel": "u"
},
{
"mora": "ヅ",
"consonant": "z",
"vowel": "u"
},
{
"mora": "ヂ",
"consonant": "j",
"vowel": "i"
},
{
"mora": "ヶ",
"consonant": "k",
"vowel": "e"
},
{
"mora": "ャ",
"consonant": "y",
"vowel": "a"
},
{
"mora": "ォ",
"consonant": null,
"vowel": "o"
},
{
"mora": "ェ",
"consonant": null,
"vowel": "e"
},
{
"mora": "ゥ",
"consonant": null,
"vowel": "u"
},
{
"mora": "ィ",
"consonant": null,
"vowel": "i"
},
{
"mora": "ァ",
"consonant": null,
"vowel": "a"
}
]
}

View File

@@ -1,5 +1,6 @@
use crate::error::{Error, Result};
use crate::norm::{replace_punctuation, PUNCTUATIONS};
use crate::mora::MORA_KATA_TO_MORA_PHONEMES;
use jpreprocess::*;
use once_cell::sync::Lazy;
use regex::Regex;
@@ -61,6 +62,14 @@ impl JTalk {
}
}
static KATAKANA_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"[\u30A0-\u30FF]+").unwrap());
static MORA_PATTERN: Lazy<Vec<String>> = Lazy::new(|| {
let mut sorted_keys: Vec<String> = MORA_KATA_TO_MORA_PHONEMES.keys().cloned().collect();
sorted_keys.sort_by(|a, b| b.len().cmp(&a.len()));
sorted_keys
});
static LONG_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\w)(ー*)").unwrap());
struct JTalkProcess {
jpreprocess: Arc<JPreprocessType>,
parsed: Vec<String>,
@@ -104,9 +113,80 @@ impl JTalkProcess {
pub fn g2p(&self) -> Result<()> {
let phone_tone_list_wo_punct = self.g2phone_tone_wo_punct()?;
let (seq_text, seq_kata) = self.text_to_seq_kata()?;
println!("{:?}", seq_text);
println!("{:?}", seq_kata);
Ok(())
}
fn handle_long(sep_phonemes: Vec<Vec<String>>) -> Vec<Vec<String>> {
for (i, phonemes) in sep_phonemes.iter().enumerate() {
if phonemes.len() == 0 {
continue;
}
if phonemes[0] == "" {
if i != 0 {
let prev_phoneme = sep_phonemes[i - 1].last().unwrap();
}
}
}
vec![]
}
fn kata_to_phoneme_list(mut text: String) -> Result<Vec<String>> {
/*
if set(text).issubset(set(PUNCTUATIONS)):
return list(text)
# `text` がカタカナ(`ー`含む)のみからなるかどうかをチェック
if __KATAKANA_PATTERN.fullmatch(text) is None:
raise ValueError(f"Input must be katakana only: {text}")
def mora2phonemes(mora: str) -> str:
consonant, vowel = MORA_KATA_TO_MORA_PHONEMES[mora]
if consonant is None:
return f" {vowel}"
return f" {consonant} {vowel}"
spaced_phonemes = __MORA_PATTERN.sub(lambda m: mora2phonemes(m.group()), text)
# 長音記号「ー」の処理
long_replacement = lambda m: m.group(1) + (" " + m.group(1)) * len(m.group(2)) # type: ignore
spaced_phonemes = __LONG_PATTERN.sub(long_replacement, spaced_phonemes)
return spaced_phonemes.strip().split(" ")
*/
if PUNCTUATIONS.contains(&text.as_str()) {
return Ok(text.chars().map(|x| x.to_string()).collect());
}
if KATAKANA_PATTERN.is_match(&text) {
return Err(Error::ValueError(format!("Input must be katakana only: {}", text)));
}
fn mora2phonemes(mora: &str) -> String {
let (consonant, vowel) = MORA_KATA_TO_MORA_PHONEMES.get(mora).unwrap();
if consonant.is_none() {
return format!(" {}", vowel);
}
format!(" {} {}", consonant.as_ref().unwrap(), vowel)
}
for mora in MORA_PATTERN.iter() {
let mora = mora.to_string();
let phonemes = mora2phonemes(&mora);
text = text.replace(&mora, &phonemes);
}
let long_replacement = |m: &regex::Captures| {
let mut result = m.get(1).unwrap().as_str().to_string();
for _ in 0..m.get(2).unwrap().as_str().len() {
result += &format!(" {}", m.get(1).unwrap().as_str());
}
result
};
text = LONG_PATTERN.replace_all(&text, long_replacement).to_string();
return Ok(text.trim().split(" ").map(|x| x.to_string()).collect());
}
fn text_to_seq_kata(&self) -> Result<(Vec<String>, Vec<String>)> {
let mut seq_kata = vec![];
let mut seq_text = vec![];