X-Git-Url: http://gitweb.fperrin.net/?a=blobdiff_plain;f=src%2Fcom%2Fhughes%2Fandroid%2Fdictionary%2Fparser%2FEnWiktionaryXmlParser.java;h=86d0ddc05b4fb44f967da9f2df1ced0f2c242906;hb=b3dd51971927861bdaeab3ea1569006c07653873;hp=0dd51d2cac2ebfed6af65ece1cf187ac637e7fb7;hpb=cde2f082e2ad9070bdee69c99ac50b77622efad4;p=DictionaryPC.git diff --git a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java index 0dd51d2..86d0ddc 100644 --- a/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java +++ b/src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java @@ -474,6 +474,7 @@ public class EnWiktionaryXmlParser { System.out.println(); } + boolean titleAppended = false; final StringBuilder foreignBuilder = new StringBuilder(); final Collection wordForms = new ArrayList(); final List listSections = new ArrayList(); @@ -533,61 +534,75 @@ public class EnWiktionaryXmlParser { } else if (name.equals("infl")) { // See: http://en.wiktionary.org/wiki/Template:infl final String langCode = get(args, 0); + String head = namedArgs.remove("head"); + if (head == null) { + head = namedArgs.remove("title"); // Bug + } + if (head == null) { + head = title; + } else { + head = WikiTokenizer.toPlainText(head); + } + titleAppended = true; + namedArgs.remove("sc"); + namedArgs.remove("lang"); + namedArgs.remove("sort"); + namedArgs.remove("cat"); + final String tr = namedArgs.remove("tr"); - final String g = namedArgs.remove("g"); + String g = namedArgs.remove("g"); + if (g == null) { + g = namedArgs.remove("gender"); + } final String g2 = namedArgs.remove("g2"); final String g3 = namedArgs.remove("g3"); - if (!namedArgs.isEmpty()) { - LOG.warning("Didn't parse infl: " + wikiTokenizer.token()); - foreignBuilder.append(wikiTokenizer.token()); - } else { - String head = namedArgs.get("head"); - if (head == null) { - head = title; - } else { - head = WikiTokenizer.toPlainText(head); - } - foreignBuilder.append(head); - - if (g != null) { - foreignBuilder.append(" {").append(g); - if (g2 != null) { - foreignBuilder.append("|").append(g2); - } - if (g3 != null) { - foreignBuilder.append("|").append(g3); - } - foreignBuilder.append("}"); - } - - if (tr != null) { - foreignBuilder.append(String.format(TRANSLITERATION_FORMAT, tr)); - wordForms.add(tr); + + foreignBuilder.append(head); + + if (g != null) { + foreignBuilder.append(" {").append(g); + if (g2 != null) { + foreignBuilder.append("|").append(g2); } - - final String pos = get(args, 1); - if (pos != null) { - foreignBuilder.append(" (").append(pos).append(")"); + if (g3 != null) { + foreignBuilder.append("|").append(g3); } - for (int i = 2; i < args.size(); i += 2) { - final String inflName = get(args, i); - final String inflValue = get(args, i + 1); - foreignBuilder.append(", ").append(WikiTokenizer.toPlainText(inflName)); - if (inflValue != null && inflValue.length() > 0) { - foreignBuilder.append(": ").append(WikiTokenizer.toPlainText(inflValue)); - wordForms.add(inflValue); - } + foreignBuilder.append("}"); + } + + if (tr != null) { + foreignBuilder.append(String.format(TRANSLITERATION_FORMAT, tr)); + wordForms.add(tr); + } + + final String pos = get(args, 1); + if (pos != null) { + foreignBuilder.append(" (").append(pos).append(")"); + } + for (int i = 2; i < args.size(); i += 2) { + final String inflName = get(args, i); + final String inflValue = get(args, i + 1); + foreignBuilder.append(", ").append(WikiTokenizer.toPlainText(inflName)); + if (inflValue != null && inflValue.length() > 0) { + foreignBuilder.append(": ").append(WikiTokenizer.toPlainText(inflValue)); + wordForms.add(inflValue); } } + for (final String key : namedArgs.keySet()) { + final String value = WikiTokenizer.toPlainText(namedArgs.get(key)); + foreignBuilder.append(" ").append(key).append("=").append(value); + wordForms.add(value); + } } else if (name.equals("it-noun")) { - final String base = get(args, 0); - final String gender = get(args, 1); - final String singular = base + get(args, 2); - final String plural = base + get(args, 3); - foreignBuilder.append(String.format(" %s {%s}, %s {pl}", singular, gender, plural, plural)); - wordForms.add(singular); - wordForms.add(plural); + titleAppended = true; + final String base = get(args, 0); + final String gender = get(args, 1); + final String singular = base + get(args, 2); + final String plural = base + get(args, 3); + foreignBuilder.append(String.format(" %s {%s}, %s {pl}", singular, gender, plural, plural)); + wordForms.add(singular); + wordForms.add(plural); } else if (name.equals("it-proper noun")) { foreignBuilder.append(wikiTokenizer.token()); } else if (name.equals("it-adj")) { @@ -638,7 +653,7 @@ public class EnWiktionaryXmlParser { // Here's where we exit. // Should we make an entry even if there are no foreign list items? String foreign = foreignBuilder.toString().trim(); - if (!foreign.toLowerCase().startsWith(title.toLowerCase())) { + if (!titleAppended && !foreign.toLowerCase().startsWith(title.toLowerCase())) { foreign = String.format("%s %s", title, foreign); } if (!langPattern.matcher(lang).matches()) {