--- /dev/null
+#!/usr/bin/python3
+
+from glob import glob
+from lxml import etree
+
+xslt_entree = etree.XSLT(etree.parse("XMLittre-entree.xslt"))
+
+class Parser:
+ def __init__(self):
+ self.entrees = {}
+
+ def parse_file(self, fname):
+ fxml = etree.parse(fname)
+ root = fxml.getroot()
+ for entree in root.getchildren():
+ assert entree.tag == "entree"
+ terme = entree.attrib["terme"]
+ entree_html = xslt_entree(entree)
+ entree_text = str(entree_html)
+ entree_text = entree_text.replace("\n", "")
+ if terme not in self.entrees:
+ self.entrees[terme] = []
+ self.entrees[terme].append(entree_text)
+
+ def writeout(self, fname):
+ with open(fname, "w") as f:
+ for terme in self.entrees:
+ f.write(terme)
+ f.write("\t")
+ if len(self.entrees[terme]) > 1:
+ f.write("<ol>")
+ for entree in self.entrees[terme]:
+ f.write("<li>")
+ f.write(entree)
+ f.write("</li>")
+ f.write("</ol>")
+ else:
+ f.write(self.entrees[terme][0])
+ f.write("\n")
+
+def main():
+ p = Parser()
+ for fname in glob("../xmlittre-data/?.xml"):
+ p.parse_file(fname)
+ p.writeout("XMLittre.tab_separated")
+
+if __name__ == "__main__":
+ main()