--- /dev/null
+OPTED_SOURCEDIR = OPTED/v003
+
+OPTED_FILES = $(addprefix $(OPTED_SOURCEDIR)/wb1913_,$(addsuffix .html,$(shell bash -c 'echo {a..z} new')))
+
+essai:
+ @echo $(FILES)
+
+all: OPTED.v006-from-tab_separated.quickdic
+
+# optedv003.hqx:
+# wget 'http://www.mso.anu.edu.au/~ralph/OPTED/optedv003.hqx'
+
+$(OPTED_FILES): optedv003.hqx
+ hexbin -d $<
+ unar OPTED.sit.data
+ find OPTED -type f | xargs sed -i 's/\r/\n/g'
+ cd OPTED && patch -p1 < ../OPTED.patch
+
+OPTED.tab_separated: $(OPTED_FILES)
+ ./OPTED_to_tab_separated
+
+OPTED.v007-from-tab_separated.quickdic: OPTED.tab_separated
+ cd ../DictionaryPC && ./run.sh --dictInfo="Webster's Unabridged Dictionary, from the OPTED project" --input1=$(CURDIR)/$< --input1Charset=UTF8 --input1Format=tab_separated --input1Name="Webster-OPTED" --lang1=EN --lang1Stoplist=data/inputs/stoplists/en.txt --dictOut=$(CURDIR)/$@ --print=$(CURDIR)/$@.txt
+
+OPTED.v006-from-tab_separated.quickdic: OPTED.v007-from-tab_separated.quickdic
+ rm -f $@
+ cd ../DictionaryPC && ./convert_to_v6.sh $(CURDIR)/$< $(CURDIR)/$@
+
+OPTED.chemnitz: OPTED/v003/wb1913_a.html
+ ./OPTED_to_chemnitz
+
+OPTED.v007-from-chemnitz.quickdic: OPTED.chemnitz
+ cd ../DictionaryPC && ./run.sh --dictInfo="Webster's Unabridged Dictionary, from the OPTED project" --input1=$(CURDIR)/$< --input1Charset=UTF8 --input1Format=chemnitz --input1Name="Webster-OPTED" --lang1=EN --lang1Stoplist=data/inputs/stoplists/en.txt --dictOut=$(CURDIR)/$@ --print=$(CURDIR)/$@.txt
+
+OPTED.v006-from-chemnitz.quickdic: OPTED.v007-from-chemnitz.quickdic
+ rm -f $@
+ cd ../DictionaryPC && ./convert_to_v6.sh $(CURDIR)/$< $(CURDIR)/$@
+
+clean:
+ rm -f OPTED.*.quickdic OPTED.tab_separated OPTED.chemnitz
+ rm -fr OPTED/
--- /dev/null
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_a.html OPTED.mine/v003/wb1913_a.html
+--- OPTED/v003/wb1913_f.html 2021-01-31 00:20:51.926049448 +0000
++++ OPTED.mine/v003/wb1913_f.html 2021-01-30 23:18:15.986529554 +0000
+@@ -5148 +5147,0 @@
+-</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_r.html OPTED.mine/v003/wb1913_r.html
+--- OPTED/v003/wb1913_r.html 2021-01-31 00:20:51.870050835 +0000
++++ OPTED.mine/v003/wb1913_r.html 2021-01-30 23:18:16.058528321 +0000
+@@ -2674 +2674,2 @@
+-<P><B>Reenforce</B> (<I>v.</I>) That part of a cannon near the breech which is thicker than the rest of the piece, so as better to resist the force of the exploding powder. See Illust. of Cannon.<P><B>Reenforce</B> (<I>v.</I>) (b) <P><B>Reenforce</B> (<I>v.</I>) An additional thickness of canvas, cloth, or the like, around an eyelet, buttonhole, etc.</P>
++<P><B>Reenforce</B> (<I>v.</I>) That part of a cannon near the breech which is thicker than the rest of the piece, so as better to resist the force of the exploding powder. See Illust. of Cannon.</P>
++<P><B>Reenforce</B> (<I>v.</I>) An additional thickness of canvas, cloth, or the like, around an eyelet, buttonhole, etc.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_s.html OPTED.mine/v003/wb1913_s.html
+--- OPTED/v003/wb1913_s.html 2021-01-31 00:20:51.894050242 +0000
++++ OPTED.mine/v003/wb1913_s.html 2021-01-30 23:23:25.685218008 +0000
+@@ -245,7 +245,7 @@
+-<P><B>Sad</B> (<I>supperl.</I>) Sated; satisfied; weary; tired.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Heavy; weighty; ponderous; close; hard.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Dull; grave; dark; somber; -- said of colors.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Serious; grave; sober; steadfast; not light or frivolous.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Affected with grief or unhappiness; cast down with affliction; downcast; gloomy; mournful.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Afflictive; calamitous; causing sorrow; as, a sad accident; a sad misfortune.</P>
+-<P><B>Sad</B> (<I>supperl.</I>) Hence, bad; naughty; troublesome; wicked.</P>
++<P><B>Sad</B> (<I>superl.</I>) Sated; satisfied; weary; tired.</P>
++<P><B>Sad</B> (<I>superl.</I>) Heavy; weighty; ponderous; close; hard.</P>
++<P><B>Sad</B> (<I>superl.</I>) Dull; grave; dark; somber; -- said of colors.</P>
++<P><B>Sad</B> (<I>superl.</I>) Serious; grave; sober; steadfast; not light or frivolous.</P>
++<P><B>Sad</B> (<I>superl.</I>) Affected with grief or unhappiness; cast down with affliction; downcast; gloomy; mournful.</P>
++<P><B>Sad</B> (<I>superl.</I>) Afflictive; calamitous; causing sorrow; as, a sad accident; a sad misfortune.</P>
++<P><B>Sad</B> (<I>superl.</I>) Hence, bad; naughty; troublesome; wicked.</P>
+@@ -12942,2 +12942 @@
+-<P><B>Spilikin</B> (<I>n.</I>) One of a number of small pieces or pegs of wood, ivory, bone, or other material, for playing a game, or for counting the score in a game, as in cribbage. In the plural (spilikins<P><B>spilikins</B> (<I>pl. </I>) of Spilikin</P>
+-), a game played with such pieces; pushpin.</P>
++<P><B>Spilikin</B> (<I>n.</I>) One of a number of small pieces or pegs of wood, ivory, bone, or other material, for playing a game, or for counting the score in a game, as in cribbage. In the plural (spilikins), a game played with such pieces; pushpin.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_f.html OPTED.mine/v003/wb1913_f.html
+--- OPTED/v003/wb1913_f.html 2021-01-31 12:11:13.664266778 +0000
++++ OPTED.mine/v003/wb1913_f.html 2021-01-31 12:04:23.084611285 +0000
+@@ -4950 +4950 @@
+-<P><B>Forewent 2</B> (<I>imp.</I>) of Forego</P>
++<P><B>Forewent</B> (<I>imp.</I>) of Forego</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_g.html OPTED.mine/v003/wb1913_g.html
+--- OPTED/v003/wb1913_g.html 2021-01-31 12:11:13.660266621 +0000
++++ OPTED.mine/v003/wb1913_g.html 2021-01-31 12:10:32.334605895 +0000
+@@ -4288 +4288 @@
+-<P><B>\d8Gregarin\91</B> (<I>n. pl.</I>) An order of Protozoa, allied to the Rhizopoda, and parasitic in other animals, as in the earthworm, lobster, etc. When adult, they have a small, wormlike body inclosing a nucleus, but without external organs; in one of the young stages, they are amoebiform; -- called also Gregarinida, and Gregarinaria.</P>
++<P><B>Gregarin¾</B> (<I>n. pl.</I>) An order of Protozoa, allied to the Rhizopoda, and parasitic in other animals, as in the earthworm, lobster, etc. When adult, they have a small, wormlike body inclosing a nucleus, but without external organs; in one of the young stages, they are amoebiform; -- called also Gregarinida, and Gregarinaria.</P>
+@@ -4291 +4291 @@
+-<P><B>\d8Gregarinida</B> (<I></I>) Gregarinae.</P>
++<P><B>Gregarinida</B> (<I></I>) Gregarinae.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_s.html OPTED.mine/v003/wb1913_s.html
+--- OPTED/v003/wb1913_s.html 2021-01-31 12:11:13.672267087 +0000
++++ OPTED.mine/v003/wb1913_s.html 2021-01-31 12:03:57.578461661 +0000
+@@ -1804,2 +1804,3 @@
+-<P><B>, a , or an . PCP. It is presumably an older spelling of scanned. --2. </B> (<I></I>) Specifically (Pros.), to go through with, as a verse, marking and distinguishing the feet of which it is composed; to show, in reading, the metrical structure of; to recite metrically.</P>
+-<P><B>, a , or an . PCP. It is presumably an older spelling of scanned. --2. Specifically (Pros.), to go through with, as a verse, marking and distinguishing the feet of which it is composed; to show, in reading, the metrical structure of; to recite metrically</B> (<I></I>) To go over and examine point by point; to examine with care; to look closely at or into; to scrutinize.</P>
++<P><B>Scan</B> It is presumably an older spelling of scanned. Specifically (Pros.), to go through with, as a verse, marking and distinguishing the feet of which it is composed; to show, in reading, the metrical structure of; to recite metrically.</P>
++<P><B>Scan</B> It is presumably an older spelling of scanned. Specifically (Pros.), to go through with, as a verse, marking and distinguishing the feet of which it is composed; to show, in reading, the metrical structure of; to recite metrically</P>
++<P><B>Scan</B> (<I></I>) To go over and examine point by point; to examine with care; to look closely at or into; to scrutinize.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_w.html OPTED.mine/v003/wb1913_w.html
+--- OPTED/v003/wb1913_w.html 2021-01-31 12:11:13.596264143 +0000
++++ OPTED.mine/v003/wb1913_w.html 2021-01-31 12:04:39.293924871 +0000
+@@ -3286 +3286 @@
+-<P><B>Winnard 2</B> (<I>n.</I>) The redwing.</P>
++<P><B>Winnard</B> (<I>n.</I>) The redwing.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_p.html OPTED.mine/v003/wb1913_p.html
+--- OPTED/v003/wb1913_p.html 2021-01-31 12:12:06.466217986 +0000
++++ OPTED.mine/v003/wb1913_p.html 2021-01-31 12:14:29.514677672 +0000
+@@ -8283 +8283 @@
+-<P><B>Poach</B> (<I>v. t.</I>) To stab; to pierce; to spear, \as fish.</P>
++<P><B>Poach</B> (<I>v. t.</I>) To stab; to pierce; to spear, as fish.</P>
+diff '--exclude=*~' -U0 -r OPTED/v003/wb1913_c.html OPTED.mine/v003/wb1913_c.html
+--- OPTED/v003/wb1913_c.html 2021-01-31 12:34:47.541056945 +0000
++++ OPTED.mine/v003/wb1913_c.html 2021-01-31 12:35:33.646765527 +0000
+@@ -12084 +12084 @@
+-<P><B>Convict1ible</B> (<I>a.</I>) Capable of being convicted.</P>
++<P><B>Convictible</B> (<I>a.</I>) Capable of being convicted.</P>
--- /dev/null
+#!/usr/bin/python3
+
+from html.parser import HTMLParser
+from glob import glob
+
+class OptedParser(HTMLParser):
+ def __init__(self, outputfile):
+ super().__init__()
+ self.reading_entry = False
+ self.reading_pos = False
+ self.reading_definition = False
+ self.entry = ""
+ self.pos = ""
+ self.definition = ""
+ self.outputfile = outputfile
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "b":
+ assert not self.reading_entry
+ assert not self.reading_pos
+ assert not self.reading_definition
+ assert not self.entry
+
+ self.reading_entry = True
+
+ elif tag == "i":
+ assert not self.reading_entry
+ assert not self.reading_pos
+ assert not self.pos
+
+ self.reading_pos = True
+
+ def handle_data(self, data):
+ if self.reading_entry:
+ self.entry += data.lower()
+ elif self.reading_pos:
+ self.pos += data
+ elif self.reading_definition:
+ self.definition += data
+
+ def handle_endtag(self, tag):
+ if tag == "b":
+ assert self.reading_entry
+ assert not self.reading_definition
+ assert not self.reading_pos
+
+ self.reading_entry = False
+ self.reading_definition = True
+
+ elif tag == "i":
+ assert not self.reading_entry
+ assert self.reading_pos
+ assert self.reading_definition
+
+ self.reading_pos = False
+
+ elif tag == "p":
+ assert not self.reading_entry
+ assert self.reading_definition
+ assert not self.reading_pos
+ assert self.entry
+ assert self.definition
+
+ assert "::" not in self.entry and "::" not in self.definition
+ assert "|" not in self.entry and "|" not in self.definition
+
+ if self.pos:
+ self.definition = f"<b>{self.entry}</b> (<i>{self.pos}</i>) {self.definition}"
+
+ self.definition = self.definition.replace("()", "")
+ self.definition = self.definition.strip()
+
+ self.outputfile.write(f"{self.entry} :: {self.definition}\n")
+ self.reading_definition = False
+ self.entry = ""
+ self.pos = ""
+ self.definition = ""
+
+def main():
+ with open("OPTED.chemnitz", "w", encoding="utf-8") as outputfile:
+ opted_parser = OptedParser(outputfile)
+ for fname in sorted(glob("OPTED/v003/wb1913_*.html")):
+ print(f"Running on {fname}")
+ with open(fname, mode="r",
+ encoding="macroman", errors="strict") as inputfile:
+ opted_parser.feed(inputfile.read())
+
+if __name__ == "__main__":
+ main()
--- /dev/null
+#!/usr/bin/python3
+
+from html.parser import HTMLParser
+from glob import glob
+
+class OptedParser(HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.reading_headword = False
+ self.reading_pos = False
+ self.reading_definition = False
+ self.headword = ""
+ self.pos = ""
+ self.definition = ""
+ self.entries = {}
+
+ def _add_definition(self, headword, definition):
+ if headword not in self.entries:
+ self.entries[headword] = []
+ self.entries[headword] += [definition]
+
+ def write_definitions(self, outputfile):
+ for headword in self.entries:
+ if len(self.entries[headword]) == 1:
+ outputfile.write(f"{headword}\t{self.entries[headword][0]}\n")
+ else:
+ outputfile.write(f"{headword}\t<ol>")
+ for entry in self.entries[headword]:
+ outputfile.write(f"<li>{entry}</li>")
+ outputfile.write(f"</ol>\n")
+ if " " in headword or "-" in headword:
+ print(f"<{headword}> has space or dash")
+
+ def handle_starttag(self, tag, attrs):
+ if tag == "b":
+ assert not self.reading_headword
+ assert not self.reading_pos
+ assert not self.reading_definition
+ assert not self.headword
+
+ self.reading_headword = True
+
+ elif tag == "i":
+ assert not self.reading_headword
+ assert not self.reading_pos
+ assert not self.pos
+
+ self.reading_pos = True
+
+ def handle_data(self, data):
+ if self.reading_headword:
+ self.headword += data.lower()
+ elif self.reading_pos:
+ self.pos += data
+ elif self.reading_definition:
+ self.definition += data
+
+ def handle_endtag(self, tag):
+ if tag == "b":
+ assert self.reading_headword
+ assert not self.reading_definition
+ assert not self.reading_pos
+
+ self.reading_headword = False
+ self.reading_definition = True
+
+ elif tag == "i":
+ assert not self.reading_headword
+ assert self.reading_pos
+ assert self.reading_definition
+
+ self.reading_pos = False
+
+ elif tag == "p":
+ assert not self.reading_headword
+ assert self.reading_definition
+ assert not self.reading_pos
+ assert self.headword
+ assert self.definition
+
+ if self.pos:
+ self.definition = f"(<i>{self.pos}</i>) {self.definition}"
+ self.definition = self.definition.replace("()", "")
+ self.definition = self.definition.strip()
+
+ assert not "\\" in self.headword, f"\\ for word {self.headword}"
+ if any(str(digit) in self.headword for digit in range(10)):
+ print(f"Warning: {self.headword} has digits")
+
+ self._add_definition(self.headword, self.definition)
+ self.reading_definition = False
+ self.headword = ""
+ self.pos = ""
+ self.definition = ""
+
+def main():
+ opted_parser = OptedParser()
+ for fname in glob("OPTED/v003/wb1913_*.html"):
+ print(f"Running on {fname}")
+ with open(fname, mode="r",
+ encoding="macroman", errors="strict") as inputfile:
+ opted_parser.feed(inputfile.read())
+
+ with open("OPTED.tab_separated", "w", encoding="utf-8") as outputfile:
+ opted_parser.write_definitions(outputfile)
+
+
+if __name__ == "__main__":
+ main()