From 769468e0eebe890ff4ba1c3ce9b39607ff86ad47 Mon Sep 17 00:00:00 2001 From: mohamed benchrifa Date: Mon, 19 Apr 2021 18:03:58 +0100 Subject: [PATCH 1/5] Added pronunciation support --- Kawazu/Division.cs | 53 ++++++++++++++++++++++++++------------- Kawazu/JapaneseElement.cs | 19 +++++++++++--- 2 files changed, 51 insertions(+), 21 deletions(-) diff --git a/Kawazu/Division.cs b/Kawazu/Division.cs index 22bd08f..7eee50a 100644 --- a/Kawazu/Division.cs +++ b/Kawazu/Division.cs @@ -38,10 +38,24 @@ public string HiraReading return builder.ToString(); } } + public string HiraPronunciation + { + get + { + var builder = new StringBuilder(); + foreach (var element in this) + { + builder.Append(element.HiraPronunciation); + } - public string KataReading => Utilities.ToRawKatakana(HiraReading); + return builder.ToString(); + } + } + public string KataReading => Utilities.ToRawKatakana(HiraReading); + public string KataPronunciation => Utilities.ToRawKatakana(HiraPronunciation); public string RomaReading => Utilities.ToRawRomaji(HiraReading); + public string RomaPronunciation => Utilities.ToRawRomaji(HiraPronunciation); public readonly bool IsEndsInTsu; @@ -52,23 +66,23 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj switch (type) { case TextType.PureKana: - foreach (var ch in node.Surface) - { - this.Add(new JapaneseElement(ch.ToString(), Utilities.ToRawKatakana(ch.ToString()), TextType.PureKana, system)); - } + for(int i = 0; i < node.Surface.Length; i++) + this.Add(new JapaneseElement(node.Surface[i].ToString(), Utilities.ToRawKatakana(node.Surface[i].ToString()), node.Pronounciation[i].ToString(), TextType.PureKana, system)); break; case TextType.PureKanji: - this.Add(new JapaneseElement(node.Surface, node.Reading, TextType.PureKanji, system)); + this.Add(new JapaneseElement(node.Surface, node.Reading, node.Pronounciation, TextType.PureKanji, system)); break; case TextType.KanjiKanaMixed: var surfaceBuilder = new StringBuilder(node.Surface); var readingBuilder = new StringBuilder(node.Reading); + var pronounciationBuilder = new StringBuilder(node.Pronounciation); var kanasInTheEnd = new StringBuilder(); while (Utilities.IsKana(surfaceBuilder[0])) // Pop the kanas in the front. { - this.Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), TextType.PureKana, system)); + this.Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), pronounciationBuilder[0].ToString(), TextType.PureKana, system)); surfaceBuilder.Remove(0, 1); readingBuilder.Remove(0, 1); + pronounciationBuilder.Remove(0, 1); } while (Utilities.IsKana(surfaceBuilder[surfaceBuilder.Length - 1])) // Pop the kanas in the end. @@ -76,6 +90,7 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj kanasInTheEnd.Append(surfaceBuilder[surfaceBuilder.Length - 1].ToString()); surfaceBuilder.Remove(surfaceBuilder.Length - 1, 1); readingBuilder.Remove(readingBuilder.Length - 1, 1); + pronounciationBuilder.Remove(pronounciationBuilder.Length - 1, 1); } if (Utilities.HasKana(surfaceBuilder.ToString())) // For the middle part: @@ -88,47 +103,51 @@ where Utilities.IsKana(ele) select ele; var kanaList = kanas.ToList(); - - foreach (var ch in surfaceBuilder.ToString()) + + var ch = surfaceBuilder.ToString(); + for(int i = 0; i < ch.Length; i++) { - if (Utilities.IsKanji(ch)) + if (Utilities.IsKanji(ch[i])) { if (kanaIndex >= kanaList.Count) { - this.Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); + this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); continue; } var index = readingBuilder.ToString() .IndexOf(Utilities.ToRawKatakana(kanaList[kanaIndex].ToString()), StringComparison.Ordinal); - this.Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); + this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); previousIndex = index; kanaIndex++; } - if (Utilities.IsKana(ch)) + if (Utilities.IsKana(ch[i])) { - this.Add(new JapaneseElement(ch.ToString(), Utilities.ToRawHiragana(ch.ToString()), TextType.PureKana, system)); + var kana = Utilities.ToRawKatakana(ch[i].ToString()); + this.Add(new JapaneseElement(ch[i].ToString(), kana, kana, TextType.PureKana, system)); } + } } else { - this.Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), TextType.PureKanji, system)); + this.Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), pronounciationBuilder.ToString(), TextType.PureKanji, system)); } if (kanasInTheEnd.Length != 0) { for (var i = kanasInTheEnd.Length - 1; i >= 0; i--) { - this.Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), Utilities.ToRawKatakana(kanasInTheEnd.ToString()[i].ToString()), TextType.PureKana, system)); + var kana = Utilities.ToRawKatakana(kanasInTheEnd.ToString()[i].ToString()); + this.Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), kana, kana, TextType.PureKana, system)); } } break; case TextType.Others: - this.Add(new JapaneseElement(node.Surface, node.Surface, TextType.Others, system)); + this.Add(new JapaneseElement(node.Surface, node.Surface, node.Pronounciation, TextType.Others, system)); break; } } diff --git a/Kawazu/JapaneseElement.cs b/Kawazu/JapaneseElement.cs index dd0cd90..8940369 100644 --- a/Kawazu/JapaneseElement.cs +++ b/Kawazu/JapaneseElement.cs @@ -11,14 +11,15 @@ public readonly struct JapaneseElement public string Element { get; } public string HiraNotation { get; } - + public string HiraPronunciation { get; } public string KataNotation { get; } - + public string KataPronunciation { get; } public string RomaNotation { get; } - + public string RomaPronunciation { get; } + public TextType Type { get; } - public JapaneseElement(string element, string kataNotation, TextType type, RomajiSystem system = RomajiSystem.Hepburn) + public JapaneseElement(string element, string kataNotation, string kataPronunciation, TextType type, RomajiSystem system = RomajiSystem.Hepburn) { Element = element; Type = type; @@ -26,14 +27,24 @@ public JapaneseElement(string element, string kataNotation, TextType type, Romaj if (type == TextType.Others) { KataNotation = kataNotation; + KataPronunciation = kataPronunciation; + HiraNotation = kataNotation; + HiraPronunciation = kataPronunciation; + RomaNotation = kataNotation; + RomaPronunciation = kataPronunciation; return; } KataNotation = kataNotation; + KataPronunciation = kataPronunciation; + HiraNotation = Utilities.ToRawHiragana(kataNotation); + HiraPronunciation = Utilities.ToRawHiragana(kataPronunciation); + RomaNotation = Utilities.ToRawRomaji(kataNotation, system); + RomaPronunciation = Utilities.ToRawRomaji(kataPronunciation, system); } } } \ No newline at end of file From 18512d069d96ad0e9effe1b2f1a0cd644dbd82cc Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 20 Apr 2021 10:19:33 +0800 Subject: [PATCH 2/5] Clean up code, unify code style --- Kawazu/Division.cs | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/Kawazu/Division.cs b/Kawazu/Division.cs index 7eee50a..6365ef2 100644 --- a/Kawazu/Division.cs +++ b/Kawazu/Division.cs @@ -66,8 +66,8 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj switch (type) { case TextType.PureKana: - for(int i = 0; i < node.Surface.Length; i++) - this.Add(new JapaneseElement(node.Surface[i].ToString(), Utilities.ToRawKatakana(node.Surface[i].ToString()), node.Pronounciation[i].ToString(), TextType.PureKana, system)); + foreach (var ch in node.Surface) + Add(new JapaneseElement(ch.ToString(), Utilities.ToRawKatakana(ch.ToString()), ch.ToString(), TextType.PureKana, system)); break; case TextType.PureKanji: this.Add(new JapaneseElement(node.Surface, node.Reading, node.Pronounciation, TextType.PureKanji, system)); @@ -75,14 +75,14 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj case TextType.KanjiKanaMixed: var surfaceBuilder = new StringBuilder(node.Surface); var readingBuilder = new StringBuilder(node.Reading); - var pronounciationBuilder = new StringBuilder(node.Pronounciation); + var pronunciationBuilder = new StringBuilder(node.Pronounciation); var kanasInTheEnd = new StringBuilder(); while (Utilities.IsKana(surfaceBuilder[0])) // Pop the kanas in the front. { - this.Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), pronounciationBuilder[0].ToString(), TextType.PureKana, system)); + Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), pronunciationBuilder[0].ToString(), TextType.PureKana, system)); surfaceBuilder.Remove(0, 1); readingBuilder.Remove(0, 1); - pronounciationBuilder.Remove(0, 1); + pronunciationBuilder.Remove(0, 1); } while (Utilities.IsKana(surfaceBuilder[surfaceBuilder.Length - 1])) // Pop the kanas in the end. @@ -90,7 +90,7 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj kanasInTheEnd.Append(surfaceBuilder[surfaceBuilder.Length - 1].ToString()); surfaceBuilder.Remove(surfaceBuilder.Length - 1, 1); readingBuilder.Remove(readingBuilder.Length - 1, 1); - pronounciationBuilder.Remove(pronounciationBuilder.Length - 1, 1); + pronunciationBuilder.Remove(pronunciationBuilder.Length - 1, 1); } if (Utilities.HasKana(surfaceBuilder.ToString())) // For the middle part: @@ -104,29 +104,28 @@ where Utilities.IsKana(ele) var kanaList = kanas.ToList(); - var ch = surfaceBuilder.ToString(); - for(int i = 0; i < ch.Length; i++) + foreach (var ch in surfaceBuilder.ToString()) { - if (Utilities.IsKanji(ch[i])) + if (Utilities.IsKanji(ch)) { if (kanaIndex >= kanaList.Count) { - this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); + Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), pronunciationBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); continue; } var index = readingBuilder.ToString() .IndexOf(Utilities.ToRawKatakana(kanaList[kanaIndex].ToString()), StringComparison.Ordinal); - this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); + Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), pronunciationBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); previousIndex = index; kanaIndex++; } - if (Utilities.IsKana(ch[i])) + if (Utilities.IsKana(ch)) { - var kana = Utilities.ToRawKatakana(ch[i].ToString()); - this.Add(new JapaneseElement(ch[i].ToString(), kana, kana, TextType.PureKana, system)); + var kana = Utilities.ToRawKatakana(ch.ToString()); + Add(new JapaneseElement(ch.ToString(), kana, kana, TextType.PureKana, system)); } } @@ -134,7 +133,7 @@ where Utilities.IsKana(ele) else { - this.Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), pronounciationBuilder.ToString(), TextType.PureKanji, system)); + Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), pronunciationBuilder.ToString(), TextType.PureKanji, system)); } if (kanasInTheEnd.Length != 0) @@ -142,12 +141,12 @@ where Utilities.IsKana(ele) for (var i = kanasInTheEnd.Length - 1; i >= 0; i--) { var kana = Utilities.ToRawKatakana(kanasInTheEnd.ToString()[i].ToString()); - this.Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), kana, kana, TextType.PureKana, system)); + Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), kana, kana, TextType.PureKana, system)); } } break; case TextType.Others: - this.Add(new JapaneseElement(node.Surface, node.Surface, node.Pronounciation, TextType.Others, system)); + Add(new JapaneseElement(node.Surface, node.Surface, node.Pronounciation, TextType.Others, system)); break; } } From ed77a196cd6da9c10be9a2ed08cb237aa861d137 Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 20 Apr 2021 10:19:33 +0800 Subject: [PATCH 3/5] Clean up code, unify code style --- Kawazu/Division.cs | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/Kawazu/Division.cs b/Kawazu/Division.cs index 7eee50a..70dcd15 100644 --- a/Kawazu/Division.cs +++ b/Kawazu/Division.cs @@ -66,23 +66,25 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj switch (type) { case TextType.PureKana: - for(int i = 0; i < node.Surface.Length; i++) - this.Add(new JapaneseElement(node.Surface[i].ToString(), Utilities.ToRawKatakana(node.Surface[i].ToString()), node.Pronounciation[i].ToString(), TextType.PureKana, system)); + for(var i = 0; i < node.Surface.Length; i++) + Add(new JapaneseElement(node.Surface[i].ToString(), Utilities.ToRawKatakana(node.Surface[i].ToString()), node.Pronounciation[i].ToString(), TextType.PureKana, system)); break; + case TextType.PureKanji: - this.Add(new JapaneseElement(node.Surface, node.Reading, node.Pronounciation, TextType.PureKanji, system)); + Add(new JapaneseElement(node.Surface, node.Reading, node.Pronounciation, TextType.PureKanji, system)); break; + case TextType.KanjiKanaMixed: var surfaceBuilder = new StringBuilder(node.Surface); var readingBuilder = new StringBuilder(node.Reading); - var pronounciationBuilder = new StringBuilder(node.Pronounciation); + var pronunciationBuilder = new StringBuilder(node.Pronounciation); var kanasInTheEnd = new StringBuilder(); while (Utilities.IsKana(surfaceBuilder[0])) // Pop the kanas in the front. { - this.Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), pronounciationBuilder[0].ToString(), TextType.PureKana, system)); + Add(new JapaneseElement(surfaceBuilder[0].ToString(), Utilities.ToRawKatakana(surfaceBuilder[0].ToString()), pronunciationBuilder[0].ToString(), TextType.PureKana, system)); surfaceBuilder.Remove(0, 1); readingBuilder.Remove(0, 1); - pronounciationBuilder.Remove(0, 1); + pronunciationBuilder.Remove(0, 1); } while (Utilities.IsKana(surfaceBuilder[surfaceBuilder.Length - 1])) // Pop the kanas in the end. @@ -90,7 +92,7 @@ public Division(MeCabIpaDicNode node, TextType type, RomajiSystem system = Romaj kanasInTheEnd.Append(surfaceBuilder[surfaceBuilder.Length - 1].ToString()); surfaceBuilder.Remove(surfaceBuilder.Length - 1, 1); readingBuilder.Remove(readingBuilder.Length - 1, 1); - pronounciationBuilder.Remove(pronounciationBuilder.Length - 1, 1); + pronunciationBuilder.Remove(pronunciationBuilder.Length - 1, 1); } if (Utilities.HasKana(surfaceBuilder.ToString())) // For the middle part: @@ -103,38 +105,36 @@ where Utilities.IsKana(ele) select ele; var kanaList = kanas.ToList(); - - var ch = surfaceBuilder.ToString(); - for(int i = 0; i < ch.Length; i++) + + foreach (var ch in surfaceBuilder.ToString()) { - if (Utilities.IsKanji(ch[i])) + if (Utilities.IsKanji(ch)) { if (kanaIndex >= kanaList.Count) { - this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); + Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), pronunciationBuilder.ToString(previousIndex + 1, readingBuilder.Length - previousIndex - 1), TextType.PureKanji, system)); continue; } var index = readingBuilder.ToString() .IndexOf(Utilities.ToRawKatakana(kanaList[kanaIndex].ToString()), StringComparison.Ordinal); - this.Add(new JapaneseElement(ch[i].ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), pronounciationBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); + Add(new JapaneseElement(ch.ToString(), readingBuilder.ToString(previousIndex + 1, index - previousIndex - 1), pronunciationBuilder.ToString(previousIndex + 1, index - previousIndex - 1), TextType.PureKanji, system)); previousIndex = index; kanaIndex++; } - if (Utilities.IsKana(ch[i])) + if (Utilities.IsKana(ch)) { - var kana = Utilities.ToRawKatakana(ch[i].ToString()); - this.Add(new JapaneseElement(ch[i].ToString(), kana, kana, TextType.PureKana, system)); + var kana = Utilities.ToRawKatakana(ch.ToString()); + Add(new JapaneseElement(ch.ToString(), kana, kana, TextType.PureKana, system)); } - } } else { - this.Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), pronounciationBuilder.ToString(), TextType.PureKanji, system)); + Add(new JapaneseElement(surfaceBuilder.ToString(), readingBuilder.ToString(), pronunciationBuilder.ToString(), TextType.PureKanji, system)); } if (kanasInTheEnd.Length != 0) @@ -142,13 +142,17 @@ where Utilities.IsKana(ele) for (var i = kanasInTheEnd.Length - 1; i >= 0; i--) { var kana = Utilities.ToRawKatakana(kanasInTheEnd.ToString()[i].ToString()); - this.Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), kana, kana, TextType.PureKana, system)); + Add(new JapaneseElement(kanasInTheEnd.ToString()[i].ToString(), kana, kana, TextType.PureKana, system)); } } break; + case TextType.Others: - this.Add(new JapaneseElement(node.Surface, node.Surface, node.Pronounciation, TextType.Others, system)); + Add(new JapaneseElement(node.Surface, node.Surface, node.Pronounciation, TextType.Others, system)); break; + + default: + throw new ArgumentOutOfRangeException(nameof(type), type, null); } } } From 51c10bf41caae8daaa8b5a4065c0b9527ec0c1b6 Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 20 Apr 2021 10:55:41 +0800 Subject: [PATCH 4/5] Add pronunciation demo #8 --- Kawazu-Cli/Program.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Kawazu-Cli/Program.cs b/Kawazu-Cli/Program.cs index c2e5011..d046027 100644 --- a/Kawazu-Cli/Program.cs +++ b/Kawazu-Cli/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Text; using System.Threading.Tasks; namespace Kawazu @@ -66,7 +67,13 @@ private static async Task Main(string[] args) }; } var result = await converter.Convert(str, to, mode, system, "(", ")"); + var pronunciation = new StringBuilder(); + foreach (var div in await converter.GetDivisions(str, to, mode, system, "(", ")")) + { + pronunciation.Append(div.RomaPronunciation); + } Console.WriteLine(result); + Console.WriteLine($"Pronunciation: {pronunciation}"); Console.WriteLine(); } } From 8758535d413118d5208c044f8f8dbac3ad33961b Mon Sep 17 00:00:00 2001 From: Rinne Date: Tue, 20 Apr 2021 11:07:12 +0800 Subject: [PATCH 5/5] Update version tag --- Kawazu/Kawazu.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Kawazu/Kawazu.csproj b/Kawazu/Kawazu.csproj index 4831c3e..08076cc 100644 --- a/Kawazu/Kawazu.csproj +++ b/Kawazu/Kawazu.csproj @@ -4,12 +4,12 @@ Kawazu Kawazu is a C# library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported. Inspired by project Kuroshiro. Japanese;Kana;Kanji;Mecab;Hiragana;Katakana;Furigana;Okurigana - 1.0.1 + 1.1.0 Cutano Cutano net5.0;netcoreapp3.1;netstandard2.0 true - 1.0.1 + 1.1.0 © Cutano 2020 https://github.com/Cutano/Kawazu https://github.com/Cutano/Kawazu