import org.ccil.cowan.tagsoup.Parser;
import org.w3c.dom.Document;
public class TagSoupParser {
public static Document parse(String html) {
Parser parser = new Parser();
MyTagSoupContentHandler handler = new MyTagSoupContentHandler();
parser.setContentHandler(handler);
parser.parse(new InputSource(new StringReader(html)));
return handler.getDocument();
}
}
class MyTagSoupContentHandler extends DefaultHandler {
private DocumentBuilderFactory factory;
private DocumentBuilder builder;
private Document document;
public MyTagSoupContentHandler() {
factory = DocumentBuilderFactory.newInstance();
try {
builder = factory.newDocumentBuilder();
document = builder.newDocument();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
}
public Document getDocument() {
return document;
}
// ...
}