|
||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||
java.lang.Objectorg.apache.manifoldcf.crawler.connectors.rss.Robots.Host
protected class Robots.Host
This class maintains status for a given host. There's an instance of this class for each host in the robots cache.
| Field Summary | |
|---|---|
protected int |
checkingRobots
This will be set to nonzero if the robots structure is currently in use |
protected java.lang.String |
hostName
Host name |
protected long |
invalidTime
Timestamp. |
protected boolean |
isValid
This flag describes whether or not the host record is valid yet. |
protected int |
port
Port |
protected java.lang.String |
protocol
Protocol |
protected boolean |
readingRobots
This will be set to "true" if the robots.txt for this host is in the process of being read. |
protected java.util.ArrayList |
records
This is the list of robots records for the host, or null if no robots.txt found. |
| Constructor Summary | |
|---|---|
Robots.Host(java.lang.String protocol,
int port,
java.lang.String hostName)
Constructor. |
|
| Method Summary | |
|---|---|
boolean |
canBeFlushed(long currentTime)
Check if the current record can be flushed. |
boolean |
isFetchAllowed(long currentTime,
java.lang.String pathString,
java.lang.String userAgent,
java.lang.String from,
double minimumMillisecondsPerBytePerServer,
int maxOpenConnectionsPerServer,
long minimumMillisecondsPerFetchPerServer,
java.lang.String proxyHost,
int proxyPort,
java.lang.String proxyAuthDomain,
java.lang.String proxyAuthUsername,
java.lang.String proxyAuthPassword,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities,
int connectionLimit)
Check a given path string against this host's robots file. |
protected void |
makeValid(long currentTime,
java.lang.String userAgent,
java.lang.String from,
double minimumMillisecondsPerBytePerServer,
int maxOpenConnectionsPerServer,
long minimumMillisecondsPerFetchPerServer,
java.lang.String proxyHost,
int proxyPort,
java.lang.String proxyAuthDomain,
java.lang.String proxyAuthUsername,
java.lang.String proxyAuthPassword,
java.lang.String hostName,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities,
int connectionLimit)
Initialize the record. |
protected void |
parseRobotsTxt(java.io.BufferedReader r,
java.lang.String hostName,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities)
Parse the robots.txt file using a reader. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected java.lang.String protocol
protected int port
protected java.lang.String hostName
protected long invalidTime
protected boolean isValid
protected java.util.ArrayList records
protected boolean readingRobots
protected int checkingRobots
| Constructor Detail |
|---|
public Robots.Host(java.lang.String protocol,
int port,
java.lang.String hostName)
| Method Detail |
|---|
public boolean isFetchAllowed(long currentTime,
java.lang.String pathString,
java.lang.String userAgent,
java.lang.String from,
double minimumMillisecondsPerBytePerServer,
int maxOpenConnectionsPerServer,
long minimumMillisecondsPerFetchPerServer,
java.lang.String proxyHost,
int proxyPort,
java.lang.String proxyAuthDomain,
java.lang.String proxyAuthUsername,
java.lang.String proxyAuthPassword,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities,
int connectionLimit)
throws org.apache.manifoldcf.agents.interfaces.ServiceInterruption,
org.apache.manifoldcf.core.interfaces.ManifoldCFException
currentTime - is the current time in milliseconds since epoch.pathString - is the path string to check.
org.apache.manifoldcf.agents.interfaces.ServiceInterruption
org.apache.manifoldcf.core.interfaces.ManifoldCFExceptionpublic boolean canBeFlushed(long currentTime)
protected void makeValid(long currentTime,
java.lang.String userAgent,
java.lang.String from,
double minimumMillisecondsPerBytePerServer,
int maxOpenConnectionsPerServer,
long minimumMillisecondsPerFetchPerServer,
java.lang.String proxyHost,
int proxyPort,
java.lang.String proxyAuthDomain,
java.lang.String proxyAuthUsername,
java.lang.String proxyAuthPassword,
java.lang.String hostName,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities,
int connectionLimit)
throws org.apache.manifoldcf.agents.interfaces.ServiceInterruption,
org.apache.manifoldcf.core.interfaces.ManifoldCFException
org.apache.manifoldcf.agents.interfaces.ServiceInterruption
org.apache.manifoldcf.core.interfaces.ManifoldCFException
protected void parseRobotsTxt(java.io.BufferedReader r,
java.lang.String hostName,
org.apache.manifoldcf.crawler.interfaces.IVersionActivity activities)
throws java.io.IOException,
org.apache.manifoldcf.core.interfaces.ManifoldCFException
java.io.IOException
org.apache.manifoldcf.core.interfaces.ManifoldCFException
|
||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||