src/com/hughes/android/dictionary/parser/EnWiktionaryXmlParser.java

   1 package com.hughes.android.dictionary.parser;
   2
   3 import java.io.File;
   4 import java.io.IOException;
   5 import java.util.ArrayList;
   6 import java.util.Arrays;
   7 import java.util.LinkedHashSet;
   8 import java.util.List;
   9 import java.util.Map;
  10 import java.util.Set;
  11 import java.util.regex.Pattern;
  12
  13 import javax.xml.parsers.ParserConfigurationException;
  14 import javax.xml.parsers.SAXParser;
  15 import javax.xml.parsers.SAXParserFactory;
  16
  17 import org.xml.sax.Attributes;
  18 import org.xml.sax.SAXException;
  19
  20 import com.hughes.android.dictionary.engine.DictionaryBuilder;
  21 import com.hughes.android.dictionary.engine.IndexBuilder;
  22 import com.hughes.android.dictionary.parser.WikiWord.FormOf;
  23 import com.hughes.android.dictionary.parser.WikiWord.Translation;
  24 import com.hughes.util.ListUtil;
  25 import com.hughes.util.StringUtil;
  26
  27 public class EnWiktionaryXmlParser extends org.xml.sax.helpers.DefaultHandler implements WikiCallback {
  28
  29   static final Pattern partOfSpeechHeader = Pattern.compile(
  30       "Noun|Verb|Adjective|Adverb|Pronoun|Conjunction|Interjection|" +
  31       "Preposition|Proper noun|Article|Prepositional phrase|Acronym|" +
  32       "Abbreviation|Initialism|Contraction|Prefix|Suffix|Symbol|Letter|" +
  33       "Ligature|Idiom|Phrase|" +
  34       // These are @deprecated:
  35       "Noun form|Verb form|Adjective form|Nominal phrase|Noun phrase|" +
  36       "Verb phrase|Transitive verb|Intransitive verb|Reflexive verb|" +
  37       // These are extras I found:
  38       "Determiner|Numeral|Number|Cardinal number|Ordinal number|Proverb|" +
  39       "Particle|Interjection|Pronominal adverb" +
  40       "Han character|Hanzi|Hanja|Kanji|Katakana character|Syllable");
  41
  42   static final Pattern wikiMarkup =  Pattern.compile("\\[\\[|\\]\\]|''+");
  43
  44   final DictionaryBuilder dictBuilder;
  45
  46   final IndexBuilder[] indexBuilders;
  47   final Pattern[] langPatterns;
  48   final int enIndexBuilder;
  49
  50   StringBuilder titleBuilder;
  51   StringBuilder textBuilder;
  52   StringBuilder currentBuilder = null;
  53
  54   public EnWiktionaryXmlParser(final DictionaryBuilder dictBuilder, final Pattern[] langPatterns, final int enIndexBuilder) {
  55     assert langPatterns.length == 2;
  56     this.dictBuilder = dictBuilder;
  57     this.indexBuilders = dictBuilder.indexBuilders.toArray(new IndexBuilder[0]);
  58     this.langPatterns = langPatterns;
  59     this.enIndexBuilder = enIndexBuilder;
  60   }
  61
  62   @Override
  63   public void startElement(String uri, String localName, String qName,
  64       Attributes attributes) {
  65     currentBuilder = null;
  66     if ("page".equals(qName)) {
  67       titleBuilder = new StringBuilder();
  68
  69       // Start with "\n" to better match certain strings.
  70       textBuilder = new StringBuilder("\n");
  71     } else if ("title".equals(qName)) {
  72       currentBuilder = titleBuilder;
  73     } else if ("text".equals(qName)) {
  74       currentBuilder = textBuilder;
  75     }
  76   }
  77
  78   @Override
  79   public void characters(char[] ch, int start, int length) throws SAXException {
  80     if (currentBuilder != null) {
  81       currentBuilder.append(ch, start, length);
  82     }
  83   }
  84
  85   @Override
  86   public void endElement(String uri, String localName, String qName)
  87       throws SAXException {
  88     currentBuilder = null;
  89     if ("page".equals(qName)) {
  90       endPage();
  91     }
  92   }
  93
  94
  95   public void parse(final File file) throws ParserConfigurationException,
  96       SAXException, IOException {
  97     final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
  98     parser.parse(file, this);
  99   }
 100
 101   int pageCount = 0;
 102   private void endPage() {
 103     title = titleBuilder.toString();
 104     ++pageCount;
 105     if (pageCount % 1000 == 0) {
 106       System.out.println("pageCount=" + pageCount);
 107     }
 108     if (title.startsWith("Wiktionary:") ||
 109         title.startsWith("Template:") ||
 110         title.startsWith("Appendix:") ||
 111         title.startsWith("Category:") ||
 112         title.startsWith("Index:") ||
 113         title.startsWith("MediaWiki:") ||
 114         title.startsWith("TransWiki:") ||
 115         title.startsWith("Citations:") ||
 116         title.startsWith("Concordance:") ||
 117         title.startsWith("Help:")) {
 118       return;
 119     }
 120     currentDepth = 0;
 121     words.clear();
 122     currentHeading = null;
 123     insidePartOfSpeech = false;
 124 //    System.err.println("Working on page: " + title);
 125     try {
 126       WikiParser.parse(textBuilder.toString(), this);
 127     } catch (Throwable e) {
 128       System.err.println("Failure on page: " + title);
 129       e.printStackTrace(System.err);
 130     }
 131
 132    for (final WikiWord word : words) {
 133      word.wikiWordToQuickDic(dictBuilder, enIndexBuilder);
 134    }  // WikiWord
 135
 136   }  // endPage()
 137
 138
 139   // ------------------------------------------------------------------------
 140   // ------------------------------------------------------------------------
 141   // ------------------------------------------------------------------------
 142   // ------------------------------------------------------------------------
 143
 144   /**
 145    * Two things can happen:
 146    *
 147    * We can be in a ==German== section.  There we will see English definitions.
 148    * Each POS should get its own QuickDic entry.  Pretty much everything goes
 149    * in.
 150    *
 151    * Or we can be in an ==English== section with English definitions
 152    * and maybe see translations for languages we care about.
 153    *
 154    * In either case, we need to differentiate the subsections (Noun, Verb, etc.)
 155    * into separate QuickDic entries, but that's tricky--how do we know when we
 156    * found a subsection?  Just ignore anything containing pronunciation and
 157    * etymology?
 158    *
 159    * How do we decide when to seal the deal on an entry?
 160    *
 161    * Would be nice if the parser told us about leaving sections....
 162    *
 163    *
 164    */
 165
 166   String title;
 167   String currentHeading;
 168   int currentDepth;
 169   final List<WikiWord> words = new ArrayList<WikiWord>();
 170   WikiWord currentWord;
 171   WikiWord.PartOfSpeech currentPartOfSpeech;
 172   WikiWord.TranslationSense currentTranslationSense;
 173   boolean insidePartOfSpeech;
 174
 175   StringBuilder wikiBuilder = null;
 176
 177   @Override
 178   public void onWikiLink(String[] args) {
 179     if (wikiBuilder == null) {
 180       return;
 181     }
 182     wikiBuilder.append(args[args.length - 1]);
 183   }
 184
 185   // ttbc: translations to be checked.
 186   static final Set<String> useRemainingArgTemplates = new LinkedHashSet<String>(Arrays.asList(
 187       "Arab", "Cyrl", "fa-Arab", "italbrac", "Khmr", "ku-Arab", "IPAchar", "Laoo",
 188       "sd-Arab", "Thai", "ttbc", "unicode", "ur-Arab", "yue-yue-j", "zh-ts",
 189       "zh-tsp", "zh-zh-p", "ug-Arab", "ko-inline", "Jpan", "Kore", "rfscript", "Latinx"));
 190   static final Set<String> ignoreTemplates = new LinkedHashSet<String>(Arrays.asList("audio", "rhymes", "hyphenation", "homophones", "wikipedia", "rel-top", "rel-bottom", "sense", "wikisource1911Enc", "g"));
 191   static final Set<String> grammarTemplates = new LinkedHashSet<String>(Arrays.asList("impf", "pf", "pf.", "indeclinable"));
 192   static final Set<String> passThroughTemplates = new LinkedHashSet<String>(Arrays.asList("zzzzzzzzzzzzzzz"));
 193
 194   @Override
 195   public void onTemplate(final List<String> positionalArgs, final Map<String,String> namedArgs) {
 196     if (positionalArgs.isEmpty()) {
 197       // This happens very rarely with special templates.
 198       return;
 199     }
 200     final String name = positionalArgs.get(0);
 201
 202     namedArgs.remove("lang");
 203     namedArgs.remove("nocat");
 204     namedArgs.remove("nocap");
 205     namedArgs.remove("sc");
 206
 207     // Pronunciation
 208     if (currentWord != null) {
 209       if (name.equals("a")) {
 210         // accent tag
 211         currentWord.currentPronunciation = new StringBuilder();
 212         currentWord.accentToPronunciation.put(positionalArgs.get(1), currentWord.currentPronunciation);
 213         return;
 214       }
 215
 216       if (name.equals("IPA") || name.equals("SAMPA") || name.equals("X-SAMPA") || name.equals("enPR")) {
 217         namedArgs.remove("lang");
 218         for (int i = 0; i < 100 && !namedArgs.isEmpty(); ++i) {
 219           final String pron = namedArgs.remove("" + i);
 220           if (pron != null) {
 221             positionalArgs.add(pron);
 222           } else {
 223             if (i > 10) {
 224               break;
 225             }
 226           }
 227         }
 228         if (!(positionalArgs.size() >= 2 && namedArgs.isEmpty())) {
 229           System.err.println("Invalid pronunciation: " + positionalArgs.toString() + namedArgs.toString());
 230         }
 231         if (currentWord.currentPronunciation == null) {
 232           currentWord.currentPronunciation = new StringBuilder();
 233           currentWord.accentToPronunciation.put("", currentWord.currentPronunciation);
 234         }
 235         if (currentWord.currentPronunciation.length() > 0) {
 236           currentWord.currentPronunciation.append("; ");
 237         }
 238         for (int i = 1; i < positionalArgs.size(); ++i) {
 239           if (i > 1) {
 240             currentWord.currentPronunciation.append(",");
 241           }
 242           final String pron = wikiMarkup.matcher(positionalArgs.get(1)).replaceAll("");
 243           currentWord.currentPronunciation.append(pron).append("");
 244         }
 245         currentWord.currentPronunciation.append(" (").append(name).append(")");
 246         return;
 247       }
 248
 249       if (name.equals("qualifier")) {
 250         //assert positionalArgs.size() == 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString();
 251         if (wikiBuilder == null) {
 252           return;
 253         }
 254         wikiBuilder.append(" (").append(positionalArgs.get(1)).append(")");
 255         return;
 256       }
 257
 258       if (name.equals("...")) {
 259         // Skipping any elided text for brevity.
 260         wikiBuilder.append("...");
 261         return;
 262       }
 263
 264       if (passThroughTemplates.contains(name)) {
 265         assert positionalArgs.size() == 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
 266         wikiBuilder.append(name);
 267         return;
 268       }
 269
 270       if (ignoreTemplates.contains(name)) {
 271         return;
 272       }
 273
 274       if ("Pronunciation".equals(currentHeading)) {
 275         System.err.println("Unhandled pronunciation template: " + positionalArgs + namedArgs);
 276         return;
 277       }
 278     }  // Pronunciation
 279
 280     // Part of speech
 281     if (insidePartOfSpeech) {
 282
 283       // form of
 284       if (name.equals("form of")) {
 285         namedArgs.remove("sc");
 286         if (positionalArgs.size() < 3 || positionalArgs.size() > 4) {
 287           System.err.println("Invalid form of.");
 288         }
 289         final String token = positionalArgs.get(positionalArgs.size() == 3 ? 2 : 3);
 290         final String grammarForm = WikiParser.simpleParse(positionalArgs.get(1));
 291         currentPartOfSpeech.formOfs.add(new FormOf(grammarForm, token));
 292         return;
 293       }
 294
 295       // The fallback plan: append the template!
 296       if (wikiBuilder != null) {
 297         wikiBuilder.append("{");
 298         boolean first = true;
 299         for (final String arg : positionalArgs) {
 300           if (!first) {
 301             wikiBuilder.append(", ");
 302           }
 303           first = false;
 304           wikiBuilder.append(arg);
 305         }
 306         // This one isn't so useful.
 307         for (final Map.Entry<String, String> entry : namedArgs.entrySet()) {
 308           if (!first) {
 309             wikiBuilder.append(", ");
 310           }
 311           first = false;
 312           wikiBuilder.append(entry.getKey()).append("=").append(entry.getValue());
 313         }
 314         wikiBuilder.append("}");
 315       }
 316
 317       //System.err.println("Unhandled part of speech template: " + positionalArgs + namedArgs);
 318       return;
 319     }  // Part of speech
 320
 321
 322     // Translations
 323     if (name.equals("trans-top")) {
 324       assert positionalArgs.size() >= 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs + title;
 325
 326       if (currentPartOfSpeech == null) {
 327         assert currentWord != null && !currentWord.partsOfSpeech.isEmpty() : title;
 328         System.err.println("Assuming last part of speech for non-nested translation section: " + title);
 329         currentPartOfSpeech = ListUtil.getLast(currentWord.partsOfSpeech);
 330       }
 331
 332       currentTranslationSense = new WikiWord.TranslationSense();
 333       currentPartOfSpeech.translationSenses.add(currentTranslationSense);
 334       if (positionalArgs.size() > 1) {
 335         currentTranslationSense.sense = positionalArgs.get(1);
 336       }
 337       return;
 338     }  // Translations
 339
 340     if (wikiBuilder == null) {
 341       return;
 342     }
 343     if (name.equals("m") || name.equals("f") || name.equals("n") || name.equals("c")) {
 344       assert positionalArgs.size() >= 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString();
 345       wikiBuilder.append("{");
 346       for (int i = 1; i < positionalArgs.size(); ++i) {
 347         wikiBuilder.append(i > 1 ? "," : "");
 348         wikiBuilder.append(positionalArgs.get(i));
 349       }
 350       wikiBuilder.append(name).append("}");
 351
 352     } else  if (name.equals("p")) {
 353       assert positionalArgs.size() == 1 && namedArgs.isEmpty();
 354       wikiBuilder.append("pl.");
 355
 356     } else  if (name.equals("s")) {
 357       assert positionalArgs.size() == 1 && namedArgs.isEmpty() || title.equals("dobra");
 358       wikiBuilder.append("sg.");
 359
 360     } else  if (grammarTemplates.contains(name)) {
 361       assert positionalArgs.size() == 1 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
 362       wikiBuilder.append(name).append(".");
 363
 364     } else  if (name.equals("l")) {
 365       // This template is designed to generate a link to a specific language-section on the target page.
 366       wikiBuilder.append(positionalArgs.size() >= 4 ? positionalArgs.get(3) : positionalArgs.get(2));
 367
 368     } else if (name.equals("t") || name.equals("t+") || name.equals("t-") || name.equals("tø")) {
 369       if (positionalArgs.size() > 2) {
 370         wikiBuilder.append(positionalArgs.get(2));
 371       }
 372       for (int i = 3; i < positionalArgs.size(); ++i) {
 373         wikiBuilder.append(i == 3 ? " {" : ",");
 374         wikiBuilder.append(positionalArgs.get(i));
 375         wikiBuilder.append(i == positionalArgs.size() - 1 ? "}" : "");
 376       }
 377       final String transliteration = namedArgs.remove("tr");
 378       if (transliteration != null) {
 379         wikiBuilder.append(" (").append(transliteration).append(")");
 380       }
 381
 382     } else  if (name.equals("trreq")) {
 383       wikiBuilder.append("{{trreq}}");
 384
 385     } else if (name.equals("qualifier")) {
 386       //assert positionalArgs.size() == 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs.toString();
 387       wikiBuilder.append(" (").append(positionalArgs.get(1)).append(")");
 388
 389     } else if (useRemainingArgTemplates.contains(name)) {
 390       for (int i = 1; i < positionalArgs.size(); ++i) {
 391         if (i != 1) {
 392           wikiBuilder.append(", ");
 393         }
 394         wikiBuilder.append(positionalArgs.get(i));
 395       }
 396     } else if (ignoreTemplates.contains(name)) {
 397       // Do nothing.
 398
 399     } else if (name.equals("initialism")) {
 400       assert positionalArgs.size() <= 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
 401       wikiBuilder.append("Initialism");
 402     } else if (name.equals("abbreviation")) {
 403       assert positionalArgs.size() <= 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
 404       wikiBuilder.append("Abbreviation");
 405     } else if (name.equals("acronym")) {
 406       assert positionalArgs.size() <= 2 && namedArgs.isEmpty() : positionalArgs.toString() + namedArgs;
 407       wikiBuilder.append("Acronym");
 408     } else {
 409       if (currentTranslationSense != null) {
 410         System.err.println("Unhandled template: " + positionalArgs.toString() + namedArgs);
 411       }
 412     }
 413   }
 414
 415   @Override
 416   public void onText(String text) {
 417     if (wikiBuilder != null) {
 418       wikiBuilder.append(text);
 419       return;
 420     }
 421   }
 422
 423   @Override
 424   public void onHeadingStart(int depth) {
 425     wikiBuilder = new StringBuilder();
 426     currentDepth = depth;
 427     if (currentPartOfSpeech != null && depth <= currentPartOfSpeech.depth) {
 428       currentPartOfSpeech = null;
 429       insidePartOfSpeech = false;
 430     }
 431     if (currentWord != null && depth <= currentWord.depth) {
 432       currentWord = null;
 433     }
 434
 435     currentHeading = null;
 436   }
 437
 438   @Override
 439   public void onHeadingEnd(int depth) {
 440     final String name = wikiBuilder.toString().trim();
 441     wikiBuilder = null;
 442     currentTranslationSense = null;
 443     currentHeading = name;
 444
 445     final boolean lang0 = langPatterns[0].matcher(name).matches();
 446     final boolean lang1 = langPatterns[1].matcher(name).matches();
 447     if (name.equalsIgnoreCase("English") || lang0 || lang1 || name.equalsIgnoreCase("Translingual")) {
 448       currentWord = new WikiWord(title, depth);
 449       if (lang0 && lang1) {
 450         System.err.println("Word is indexed in both index1 and index2: " + title);
 451       }
 452       currentWord.language = name;
 453       currentWord.index = lang0 ? 0 : (lang1 ? 1 : -1);
 454       words.add(currentWord);
 455       return;
 456     }
 457
 458     if (currentWord == null) {
 459       return;
 460     }
 461
 462     if (currentPartOfSpeech != null && depth <= currentPartOfSpeech.depth) {
 463       currentPartOfSpeech = null;
 464     }
 465
 466     insidePartOfSpeech = false;
 467     if (currentPartOfSpeech == null && partOfSpeechHeader.matcher(name).matches()) {
 468       currentPartOfSpeech = new WikiWord.PartOfSpeech(depth, name);
 469       currentWord.partsOfSpeech.add(currentPartOfSpeech);
 470       insidePartOfSpeech = true;
 471       return;
 472     }
 473
 474     if (name.equals("Translations")) {
 475       if (currentWord == null ||
 476           !currentWord.language.equals("English") ||
 477           currentPartOfSpeech == null) {
 478         System.err.println("Unexpected Translations section: " + title);
 479         return;
 480       }
 481       currentTranslationSense = new WikiWord.TranslationSense();
 482     }
 483
 484   }
 485
 486   @Override
 487   public void onListItemStart(String header, int[] section) {
 488     wikiBuilder = new StringBuilder();
 489     if (currentWord != null) {
 490       currentWord.currentPronunciation = null;
 491     }
 492   }
 493
 494
 495   @Override
 496   public void onListItemEnd(String header, int[] section) {
 497     String item = wikiBuilder.toString().trim();
 498     final String oldItem = item;
 499     if (item.length() == 0) {
 500       return;
 501     }
 502     item = WikiParser.simpleParse(item);
 503     wikiBuilder = null;
 504
 505     // Part of speech
 506     if (insidePartOfSpeech) {
 507       assert currentPartOfSpeech != null : title + item;
 508       if (header.equals("#") ||
 509           header.equals("##") ||
 510           header.equals("###") ||
 511           header.equals("####") ||
 512           header.equals(":#") ||
 513           header.equals("::") ||
 514           header.equals(":::*")) {
 515         // Definition.
 516         // :: should append, probably.
 517         currentPartOfSpeech.newMeaning().meaning = item;
 518
 519       // Source
 520       } else if (header.equals("#*") ||
 521                  header.equals("##*") ||
 522                  header.equals("###*")) {
 523         currentPartOfSpeech.lastMeaning().newExample().source = item;
 524
 525       // Example
 526       } else if (header.equals("#:") ||
 527                  header.equals("#*:") ||
 528                  header.equals("#:*") ||
 529                  header.equals("##:") ||
 530                  header.equals("##*:") ||
 531                  header.equals("#:*:") ||
 532                  header.equals("#:*#") ||
 533                  header.equals("#*:") ||
 534                  header.equals("*:") ||
 535                  header.equals("#:::") ||
 536                  header.equals("#**") ||
 537                  header.equals("#*:::") ||
 538                  header.equals("#:#") ||
 539                  header.equals(":::") ||
 540                  header.equals("##:*") ||
 541                  header.equals("###*:")) {
 542         StringUtil.appendLine(currentPartOfSpeech.lastMeaning().newExample().example, item);
 543
 544       // Example in English
 545       } else if (header.equals("#::") ||
 546                  header.equals("#*::") ||
 547                  header.equals("#:**") ||
 548                  header.equals("#*#") ||
 549                  header.equals("##*::")) {
 550         StringUtil.appendLine(currentPartOfSpeech.lastMeaning().lastExample().exampleInEnglish, item);
 551
 552       // Skip
 553       } else if (header.equals("*") ||
 554                  header.equals("**") ||
 555                  header.equals("***") ||
 556                  header.equals("*#") ||
 557                  header.equals(":") ||
 558                  header.equals("::*") ||
 559                  header.equals("#**") ||
 560                  header.equals(":*") ||
 561                  header.equals("#*:*") ||
 562                  header.equals("#*:**") ||
 563                  header.equals("#*:#") ||
 564                  header.equals("#*:*:") ||
 565                  header.equals("#*:*") ||
 566                  header.equals(";")) {
 567         // might have: * {{seeCites}}
 568         // * [[w:Arabic numerals|Arabic numerals]]: 2
 569         //assert item.trim().length() == 0;
 570         System.err.println("Skipping meaning: " + header + " " + item);
 571       } else {
 572         if (title.equals("Yellowknife")) {
 573           return;
 574         }
 575         System.err.println("Busted heading: " + title + "  "+ header + " " + item);
 576       }
 577       return;
 578     }
 579     // Part of speech
 580
 581     // Translation
 582     if (currentTranslationSense != null) {
 583       if (item.indexOf("{{[trreq]{}}}") != -1) {
 584         return;
 585       }
 586
 587       if (currentPartOfSpeech.translationSenses.isEmpty()) {
 588         currentPartOfSpeech.translationSenses.add(currentTranslationSense);
 589       }
 590
 591       final int colonPos = item.indexOf(':');
 592       if (colonPos == -1) {
 593         System.err.println("Invalid translation: title=" + title +  ",  item=" + item);
 594         return;
 595       }
 596       final String lang = item.substring(0, colonPos);
 597       final String trans = item.substring(colonPos + 1).trim();
 598       for (int i = 0; i < 2; ++i) {
 599         if (langPatterns[i].matcher(lang).find()) {
 600           currentTranslationSense.translations.get(i).add(new Translation(lang, trans));
 601         }
 602       }
 603     } // Translation
 604   }
 605
 606   @Override
 607   public void onNewLine() {
 608   }
 609
 610   @Override
 611   public void onNewParagraph() {
 612   }
 613
 614   // ----------------------------------------------------------------------
 615
 616   @Override
 617   public void onComment(String text) {
 618   }
 619
 620   @Override
 621   public void onFormatBold(boolean boldOn) {
 622   }
 623
 624   @Override
 625   public void onFormatItalic(boolean italicOn) {
 626   }
 627
 628   @Override
 629   public void onUnterminated(String start, String rest) {
 630     System.err.printf("OnUnterminated: %s %s %s\n", title, start, rest);
 631   }
 632   @Override
 633   public void onInvalidHeaderEnd(String rest) {
 634     throw new RuntimeException(rest);
 635   }
 636
 637 }