import org.ccil.cowan.tagsoup.Parser; import org.w3c.dom.Document; public class TagSoupParser { public static Document parse(String html) { Parser parser = new Parser(); MyTagSoupContentHandler handler = new MyTagSoupContentHandler(); parser.setContentHandler(handler); parser.parse(new InputSource(new StringReader(html))); return handler.getDocument(); } } class MyTagSoupContentHandler extends DefaultHandler { private DocumentBuilderFactory factory; private DocumentBuilder builder; private Document document; public MyTagSoupContentHandler() { factory = DocumentBuilderFactory.newInstance(); try { builder = factory.newDocumentBuilder(); document = builder.newDocument(); } catch (ParserConfigurationException e) { e.printStackTrace(); } } public Document getDocument() { return document; } // ... }


上一篇:
下一篇:
切换中文