#!/usr/bin/python3 from glob import glob from lxml import etree xslt_entree = etree.XSLT(etree.parse("XMLittre-entree.xslt")) class Parser: def __init__(self): self.entrees = {} def parse_file(self, fname): fxml = etree.parse(fname) root = fxml.getroot() for entree in root.getchildren(): assert entree.tag == "entree" terme = entree.attrib["terme"] entree_html = xslt_entree(entree) entree_text = str(entree_html) entree_text = entree_text.replace("\n", "") if terme not in self.entrees: self.entrees[terme] = [] self.entrees[terme].append(entree_text) def writeout(self, fname): with open(fname, "w") as f: for terme in self.entrees: f.write(terme) f.write("\t") if len(self.entrees[terme]) > 1: f.write("
    ") for entree in self.entrees[terme]: f.write("
  1. ") f.write(entree) f.write("
  2. ") f.write("
") else: f.write(self.entrees[terme][0]) f.write("\n") def main(): p = Parser() for fname in glob("../xmlittre-data/?.xml"): p.parse_file(fname) p.writeout("XMLittre.tab_separated") if __name__ == "__main__": main()