- final List<String> listPrefixes = new ArrayList<String>();
- final List<String> listLines = new ArrayList<String>();
-
-static final Pattern UNINDEXED_WIKI_TEXT = Pattern.compile(
- "(first|second|third)-person (singular|plural)|" +
- "present tense|" +
- "imperative"
- );
-
- private void handleForeignListItem(final String foreignText, String title, final Collection<String> forms, final WikiTokenizer wikiTokenizer) {
-
- final String prefix = wikiTokenizer.listItemPrefix();
- if (prefix.length() > 1) {
- System.err.println("Prefix too long: " + wikiTokenizer.token());
- return;
- }
-
- listPrefixes.clear();
- listLines.clear();
- listPrefixes.add(prefix);
- listLines.add(wikiTokenizer.listItemWikiText());
- while(wikiTokenizer.nextToken() != null &&
- wikiTokenizer.isNewline() ||
- wikiTokenizer.isComment() ||
- (wikiTokenizer.isListItem() &&
- wikiTokenizer.listItemPrefix().length() > prefix.length() &&
- wikiTokenizer.listItemPrefix().startsWith(prefix))) {
- if (wikiTokenizer.isListItem()) {
- listPrefixes.add(wikiTokenizer.listItemPrefix());
- listLines.add(wikiTokenizer.listItemWikiText());
- }
- }
- if (wikiTokenizer.nextToken() != null) {
- wikiTokenizer.returnToLineStart();
- }
- System.out.println("list lines: " + listLines);
- System.out.println("list prefixes: " + listPrefixes);
-
- final PairEntry pairEntry = new PairEntry();
- final IndexedEntry indexedEntry = new IndexedEntry(pairEntry);
-
- final String foreign = trim(title);
-
- final StringBuilder englishBuilder = new StringBuilder();
-
- final String mainLine = listLines.get(0);
-
- final WikiTokenizer englishTokenizer = new WikiTokenizer(mainLine, false);
- while (englishTokenizer.nextToken() != null) {
- // TODO handle form of....
- if (englishTokenizer.isPlainText()) {
- englishBuilder.append(englishTokenizer.token());
- enIndexBuilder.addEntryWithString(indexedEntry, englishTokenizer.token(), EntryTypeName.WIKTIONARY_ENGLISH_DEF);
- } else if (englishTokenizer.isWikiLink()) {
- final String text = englishTokenizer.wikiLinkText();
- final String link = englishTokenizer.wikiLinkDest();
- if (link != null) {
- if (link.contains("#English")) {
- englishBuilder.append(text);
- enIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
- } else if (link.contains("#") && this.langPattern.matcher(link).find()) {
- englishBuilder.append(text);
- otherIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_OTHER_LANG);
- } else {
- System.err.println("Special link: " + englishTokenizer.token());
- // TODO: something here...
- }
- } else {
- // link == null
- englishBuilder.append(text);
- if (!UNINDEXED_WIKI_TEXT.matcher(text).find()) {
- enIndexBuilder.addEntryWithString(indexedEntry, text, EntryTypeName.WIKTIONARY_ENGLISH_DEF_WIKI_LINK);
- }
- }
- } else if (englishTokenizer.isFunction()) {
- final String name = englishTokenizer.functionName();
- if (name.contains(" conjugation of ") ||
- name.contains(" form of ") ||
- name.contains(" feminine of ") ||
- name.contains(" plural of ")) {
- // Ignore these in the index, they're really annoying....
- englishBuilder.append(englishTokenizer.token());
- } else {
- System.err.println("Unexpected function: " + englishTokenizer.token());
- }
- } else {
- if (englishTokenizer.isComment() || englishTokenizer.isMarkup()) {
- } else {
- System.err.println("Unexpected definition text: " + englishTokenizer.token());
- }
- }
- }
- final String english = trim(englishBuilder.toString());
- if (english.length() > 0) {
- final Pair pair = new Pair(english, trim(foreignText), this.swap);
- pairEntry.pairs.add(pair);
- otherIndexBuilder.addEntryWithString(indexedEntry, title, EntryTypeName.WIKTIONARY_TITLE_SINGLE, EntryTypeName.WIKTIONARY_TITLE_MULTI);
- for (final String form : forms) {
- otherIndexBuilder.addEntryWithString(indexedEntry, form, EntryTypeName.WIKTIONARY_FORM_SINGLE, EntryTypeName.WIKTIONARY_FORM_MULTI);
- }
- }