|
||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||
java.lang.Objectorg.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector.FindHandler
org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector.FindHTMLHrefHandler
protected class WebcrawlerConnector.FindHTMLHrefHandler
This class is the handler for HTML parsing during state transitions
| Field Summary | |
|---|---|
protected java.util.regex.Pattern |
preferredLinkPattern
|
| Fields inherited from class org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector.FindHandler |
|---|
parentURI, targetURI |
| Constructor Summary | |
|---|---|
WebcrawlerConnector.FindHTMLHrefHandler(java.lang.String parentURI,
java.util.regex.Pattern preferredLinkPattern)
|
|
| Method Summary | |
|---|---|
void |
noteAHREF(java.lang.String rawURL)
Note discovered href |
void |
noteDiscoveredLink(java.lang.String rawURL)
Override noteDiscoveredLink |
void |
noteFormEnd()
Note the end of a form |
void |
noteFormInput(java.util.Map inputAttributes)
Note an input tag |
void |
noteFormStart(java.util.Map formAttributes)
Note the start of a form |
void |
noteFRAMESRC(java.lang.String rawURL)
Note discovered FRAME SRC |
void |
noteIMGSRC(java.lang.String rawURL)
Note discovered IMG SRC |
void |
noteLINKHREF(java.lang.String rawURL)
Note discovered href |
void |
noteMetaTag(java.util.Map metaAttributes)
Note a meta tag |
| Methods inherited from class org.apache.manifoldcf.crawler.connectors.webcrawler.WebcrawlerConnector.FindHandler |
|---|
getTargetURI |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected java.util.regex.Pattern preferredLinkPattern
| Constructor Detail |
|---|
public WebcrawlerConnector.FindHTMLHrefHandler(java.lang.String parentURI,
java.util.regex.Pattern preferredLinkPattern)
| Method Detail |
|---|
public void noteMetaTag(java.util.Map metaAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteMetaTag in interface IMetaTagHandlermetaAttributes - are the attributes that belong to the tag.
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormStart(java.util.Map formAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormStart in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormInput(java.util.Map inputAttributes)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormInput in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFormEnd()
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFormEnd in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteDiscoveredLink(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteDiscoveredLink in interface IDiscoveredLinkHandlernoteDiscoveredLink in class WebcrawlerConnector.FindHandlerrawURL - is the raw discovered url. This may be relative, malformed, or otherwise unsuitable for use until final form is acheived.
org.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteAHREF(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteAHREF in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteLINKHREF(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteLINKHREF in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteIMGSRC(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteIMGSRC in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
public void noteFRAMESRC(java.lang.String rawURL)
throws org.apache.manifoldcf.core.interfaces.ManifoldCFException
noteFRAMESRC in interface IHTMLHandlerorg.apache.manifoldcf.core.interfaces.ManifoldCFException
|
||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||