From 58f90bc7be44db5f61d02527ced3cac01863b076 Mon Sep 17 00:00:00 2001 From: Thad Hughes Date: Wed, 25 Dec 2013 17:48:07 -0800 Subject: [PATCH] Fixes for Malay$ and reorderings due to new ICU4J. --- .../engine/DictionaryBuilderMain.java | 2 +- .../parser/wiktionary/WiktionaryLangs.java | 8 ++++++-- .../goldens/wiktionary.cmn_en.quickdic.text | 20 +++++++++---------- .../goldens/wiktionary.th_th.quickdic.text | 4 ++-- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java index 892c320..57e76cc 100644 --- a/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java +++ b/src/com/hughes/android/dictionary/engine/DictionaryBuilderMain.java @@ -150,7 +150,7 @@ public class DictionaryBuilderMain extends TestCase { isoToDedication.put("PT", "Wiktionary-based Portuguese dictionary dedicated to Carlos Melo, one Tough Mudder."); isoToDedication.put("RO", "Wiktionary-based Romanian dictionary dedicated to Radu Teodorescu."); isoToDedication.put("RU", "Wiktionary-based Russian dictionary dedicated to Maxim Aronin--best friend always!."); - isoToDedication.put("SR", "Wiktionary-based Serbian dictionary dedicated to Filip Crnogorac--thanks for the honey."); + isoToDedication.put("SR", "Wiktionary-based Serbian dictionary dedicated to Filip Crnogorac--thanks for the honey!"); isoToDedication.put("ES", "Wiktionary-based Spanish dictionary made especially for Carolina Tropini! <3 XoXoXXXXX!"); isoToDedication.put("SV", "Wiktionary-based Swedish dictionary dedicated to Kajsa Palmblad--björn kramar!"); } diff --git a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java index 6341fa9..3af9a13 100644 --- a/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java +++ b/src/com/hughes/android/dictionary/parser/wiktionary/WiktionaryLangs.java @@ -101,13 +101,14 @@ public class WiktionaryLangs { isoCodeToEnWikiName.put("AZ", "Azeri"); isoCodeToEnWikiName.put("EU", "Basque"); isoCodeToEnWikiName.put("BR", "Breton"); - isoCodeToEnWikiName.put("MR", "Burmese"); + isoCodeToEnWikiName.put("MR", "Marathi"); isoCodeToEnWikiName.put("FO", "Faroese"); isoCodeToEnWikiName.put("GL", "Galician"); isoCodeToEnWikiName.put("KA", "Georgian"); isoCodeToEnWikiName.put("HT", "Haitian Creole"); isoCodeToEnWikiName.put("LB", "Luxembourgish"); isoCodeToEnWikiName.put("MK", "Macedonian"); + isoCodeToEnWikiName.put("GV", "Manx"); // No longer exists in EN: // isoCodeToEnWikiName.put("BS", "Bosnian"); @@ -199,7 +200,10 @@ public class WiktionaryLangs { return null; } if (name.indexOf('|') != -1) { - return name.substring(name.indexOf('|')); + return name.substring(0, name.indexOf('|')); + } + if (name.indexOf('$') != -1) { + return name.substring(0, name.indexOf('$')); } return name; // can be null. } diff --git a/testdata/goldens/wiktionary.cmn_en.quickdic.text b/testdata/goldens/wiktionary.cmn_en.quickdic.text index 141150d..70365bc 100644 --- a/testdata/goldens/wiktionary.cmn_en.quickdic.text +++ b/testdata/goldens/wiktionary.cmn_en.quickdic.text @@ -763,12 +763,6 @@ Index: cmn cmn->EN 二月 (èryuè) :: February (second month of the Gregorian calendar) (proper noun) ===example=== (the adjectives are in a dictionary form) 越……越…… (yuè...yuè...) (example: 越热越好 yuè rè yuè hǎo "the hotter the better"), 愈……愈…… (yù...yù...) :: the (the + ~comparative, the + comparative) (adverb) -***略語*** - 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) - 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) -***略语*** - 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) - 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) ===faan1=== (Cantonese) 南瓜 (naam4 gwaa1),番瓜 (faan1 gwaa1) :: pumpkin (plant) (noun) (Cantonese) 南瓜 (naam4 gwaa1),番瓜 (faan1 gwaa1) :: pumpkin (fruit of this plant) (noun) @@ -1868,6 +1862,12 @@ Index: cmn cmn->EN ===lüèyǔ=== 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) +***略語*** + 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) + 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) +***略语*** + 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) + 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) ===luh=== (Min Nan) 啤酒 (bih-luh), 麥仔酒 (be̍h-á-chiú), 米仔酒 (bí-á-chiú) :: beer (alcoholic drink made of malt) (noun) ===luk6=== @@ -2619,10 +2619,10 @@ Index: cmn cmn->EN 赦免 (shèmiǎn), 赦罪 (shèzuì) :: absolve (theology: pronounce free or give absolution from sin) (verb) ===sheng=== 女性服务生 (nuxing fuwu-sheng) :: bellgirl (a female bellhop) (noun) -***省略*** - 減少, 减少 (jiǎnshǎo), 省略 (shěnglüè) :: abate (to deduct, to omit) (verb) ===shěnglüè=== 減少, 减少 (jiǎnshǎo), 省略 (shěnglüè) :: abate (to deduct, to omit) (verb) +***省略*** + 減少, 减少 (jiǎnshǎo), 省略 (shěnglüè) :: abate (to deduct, to omit) (verb) ===shēngzhǎng=== 生長, 生长 (shēngzhǎng) :: growth (increase in size) (noun) ***生長*** @@ -2895,12 +2895,12 @@ Index: cmn cmn->EN 蘇聯, 苏联 (Sūlián) :: USSR (Union of Soviet Socialist Republics) (proper noun) ===suǒ=== 廁所, 厕所 (cè suǒ) :: can (toilet) (noun) +===suōlüècí=== + 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) ***縮略詞*** 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) ***缩略词*** 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) -===suōlüècí=== - 縮寫, 缩写 (suōxiě), 縮略詞, 缩略词 (suōlüècí), 略語, 略语 (lüèyǔ) :: acronym (word formed by initial letters) (noun) ===suōxiě=== 縮寫, 缩写 (suōxiě); 簡寫, 简写 (jiǎnxiě); 略語, 略语 (lüèyǔ) :: abbreviation (shortened or contracted form of a word or phrase) (noun) 縮寫, 缩写 (suōxiě) :: abbreviation (act or result of shortening or reducing) (noun) diff --git a/testdata/goldens/wiktionary.th_th.quickdic.text b/testdata/goldens/wiktionary.th_th.quickdic.text index b802d41..01f53bc 100644 --- a/testdata/goldens/wiktionary.th_th.quickdic.text +++ b/testdata/goldens/wiktionary.th_th.quickdic.text @@ -2,8 +2,6 @@ dictInfo=SomeWikiData EntrySource: enwiktionary.thai 4998 Index: TH TH->EN -***ฯลฯ*** - ฯลฯ (abbreviation) :: et cetera ***๐*** ๐ (ศูนย์, súún) :: 0 (zero) ***๑*** @@ -1758,6 +1756,8 @@ Index: TH TH->EN ***กอด*** กอด (/gɔd/) {th-verb} :: To hug กอด (/gɔd/) {th-verb} :: To hold on to someone +***ฯลฯ*** + ฯลฯ (abbreviation) :: et cetera ===ḹ=== ฦๅ (ḹ; leu) (letter) :: A letter of the Thai alphabet, considered a vowel. ***ล*** -- 2.43.0