// 使用 ObjectFindingVisitor 配合不同的 Tag.class
分析网页
try
{
ImageTag imgLink;
ObjectFindingVisitor visitor = new
ObjectFindingVisitor(ImageTag.class);
Parser parser = new Parser();
parser.setURL(url);
parser.setEncoding(parser.getEncoding());
parser.visitAllNodesWith(visitor);
Node[] nodes = visitor.getTags();
for (int i = 0; i < nodes.length; i++)
{
// ImageTag
imgLink = (ImageTag) nodes[i];
// "http://www.msn.com.tw/webinclude/zh-tw/images/msn.gif"
=> ImageURL
System.out.println("ImageURL = " +
imgLink.getImageURL());
// "/webinclude/zh-tw/images/msn.gif" =>
ImageLocation
System.out.println("ImageLocation = " +
imgLink.extractImageLocn());
// SRC value
System.out.println("SRC = " +
imgLink.getAttribute("SRC"));
}
}
catch (Exception e)
{
e.printStackTrace();
}
// 使用不同的 NodeFilter 分析网页
try
{
NodeFilter filter = new TagNameFilter("IMG");
NodeFilter filter1 = new
NodeClassFilter(ImageTag.class);
Parser parser = new Parser();
parser.setURL(url);
parser.setEncoding(parser.getEncoding());
NodeList list =
parser.extractAllNodesThatMatch(filter);
for (int i = 0; i < list.size(); i++)
{
System.out.println(list.elementAt(i).toHtml());
}
}
catch (Exception e)
{
e.printStackTrace();
}
// 直接使用 Tag.class 分析网页
try
{
Parser parser = new Parser();
parser.setURL(url);
parser.setEncoding(parser.getEncoding());
Node[] nodes =
parser.extractAllNodesThatAre(ImageTag.class);
for (int i = 0; i < nodes.length; i++)
{
ImageTag imageTag = (ImageTag) nodes[i];
System.out.println(imageTag.getImageURL());
}
}
catch (Exception e)
{
e.printStackTrace();
}