X-Git-Url: http://gitweb.fperrin.net/?p=gen-quickdic.git;a=blobdiff_plain;f=GCIDE_to_tab_separated;fp=GCIDE_to_tab_separated;h=99a497392eadf854b980032be5af678e966f7a38;hp=0000000000000000000000000000000000000000;hb=028a6accf7d7d721797482d65dd4dc9d13611840;hpb=5da87a9ec2370bb2f7ce11e107f07625e42f7171 diff --git a/GCIDE_to_tab_separated b/GCIDE_to_tab_separated new file mode 100755 index 0000000..99a4973 --- /dev/null +++ b/GCIDE_to_tab_separated @@ -0,0 +1,173 @@ +#!/usr/bin/python3 + +import lxml.etree +import re + +entity_map = { + "
", + "&": "&", + "", replace_fake_comments, rawtext, flags=re.DOTALL) + rawtext = f"" + rawtext + f"" + for entity, char in entity_map.items(): + rawtext = rawtext.replace(entity, char) + print(rawtext.splitlines()[5724:5730]) + e = lxml.etree.XML(rawtext)