|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectorg.apache.manifoldcf.crawler.system.WorkerThread.ProcessActivity
protected static class WorkerThread.ProcessActivity
Process activity class wraps access to the ingester and job queue.
| Field Summary | |
|---|---|
protected java.util.HashMap |
abortSet
|
protected IRepositoryConnection |
connection
|
protected IRepositoryConnector |
connector
|
protected IRepositoryConnectionManager |
connMgr
|
protected long |
currentTime
|
protected boolean |
ingestAllowed
|
protected IIncrementalIngester |
ingester
|
protected WorkerThread.OutputActivity |
ingestLogger
|
protected IJobDescription |
job
|
protected IJobManager |
jobManager
|
protected java.lang.String[] |
legalLinkTypes
|
protected java.util.HashMap |
lowerExpireBounds
|
protected java.util.HashMap |
lowerRescheduleBounds
|
protected java.util.HashMap |
originationTimes
|
protected java.lang.String |
outputVersion
|
protected QueueTracker |
queueTracker
|
protected java.util.HashMap |
referenceList
|
protected IThreadContext |
threadContext
|
protected java.util.HashMap |
upperExpireBounds
|
protected java.util.HashMap |
upperRescheduleBounds
|
| Fields inherited from interface org.apache.manifoldcf.crawler.interfaces.IProcessActivity |
|---|
_rcsid |
| Constructor Summary | |
|---|---|
WorkerThread.ProcessActivity(IThreadContext threadContext,
QueueTracker queueTracker,
IJobManager jobManager,
IIncrementalIngester ingester,
long currentTime,
IJobDescription job,
IRepositoryConnection connection,
IRepositoryConnector connector,
IRepositoryConnectionManager connMgr,
java.lang.String[] legalLinkTypes,
WorkerThread.OutputActivity ingestLogger,
java.util.HashMap abortSet,
java.lang.String outputVersion)
Constructor. |
|
| Method Summary | |
|---|---|
void |
addDocumentReference(java.lang.String localIdentifier)
Add a document description to the current job's queue. |
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType)
Add a document description to the current job's queue. |
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues)
Add a document description to the current job's queue. |
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime)
Add a document description to the current job's queue. |
void |
addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime,
java.lang.String[] prereqEventNames)
Add a document description to the current job's queue. |
boolean |
beginEventSequence(java.lang.String eventName)
Begin an event sequence. |
java.lang.Long |
calculateDocumentExpireTime(long currentTime,
java.lang.String localIdentifier)
|
java.lang.Long |
calculateDocumentRescheduleTime(long currentTime,
long timeAmt,
java.lang.String localIdentifier)
|
boolean |
checkDocumentIndexable(java.io.File localFile)
Check whether a document is indexable by the currently specified output connector. |
void |
checkJobStillActive()
Check whether current job is still active. |
boolean |
checkMimeTypeIndexable(java.lang.String mimeType)
Check whether a mime type is indexable by the currently specified output connector. |
void |
completeEventSequence(java.lang.String eventName)
Complete an event sequence. |
java.lang.String |
createConnectionSpecificString(java.lang.String simpleString)
Create a connection-specific string from a simple string. |
java.lang.String |
createGlobalString(java.lang.String simpleString)
Create a global string from a simple string. |
java.lang.String |
createJobSpecificString(java.lang.String simpleString)
Create a job-based string from a simple string. |
void |
deleteDocument(java.lang.String documentIdentifier)
Delete the current document from the search engine index. |
void |
discard()
Clean up any dangling information, before abandoning this process activity object |
void |
flush()
Flush the outstanding references into the database. |
java.lang.Long |
getDocumentExpirationLowerBoundTime(java.lang.String localIdentifier)
Find a document's lower expiration time bound, if any |
java.lang.Long |
getDocumentExpirationUpperBoundTime(java.lang.String localIdentifier)
Find a document's upper expiration time bound, if any |
java.lang.Long |
getDocumentOriginationTime(java.lang.String localIdentifier)
Get a document's origination time |
java.lang.Long |
getDocumentRescheduleLowerBoundTime(java.lang.String localIdentifier)
Find a document's lower rescheduling time bound, if any |
java.lang.Long |
getDocumentRescheduleUpperBoundTime(java.lang.String localIdentifier)
Find a document's upper rescheduling time bound, if any |
void |
ingestDocument(java.lang.String documentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
Ingest the current document. |
protected void |
processDocumentReferences()
Process outstanding document references, in batch. |
void |
recordActivity(java.lang.Long startTime,
java.lang.String activityType,
java.lang.Long dataSize,
java.lang.String entityIdentifier,
java.lang.String resultCode,
java.lang.String resultDescription,
java.lang.String[] childIdentifiers)
Record time-stamped information about the activity of the connector. |
void |
recordDocument(java.lang.String documentIdentifier,
java.lang.String version)
Record a document version, but don't ingest it. |
void |
resetTimes()
Reset the recorded times |
java.lang.String[] |
retrieveParentData(java.lang.String localIdentifier,
java.lang.String dataName)
Retrieve data passed from parents to a specified child document. |
CharacterInput[] |
retrieveParentDataAsFiles(java.lang.String localIdentifier,
java.lang.String dataName)
Retrieve data passed from parents to a specified child document. |
void |
retryDocumentProcessing(java.lang.String localIdentifier)
Abort processing a document (for sequencing reasons). |
void |
setDocumentOriginationTime(java.lang.String localIdentifier,
java.lang.Long originationTime)
Override a document's origination time. |
void |
setDocumentScheduleBounds(java.lang.String localIdentifier,
java.lang.Long lowerRecrawlBoundTime,
java.lang.Long upperRecrawlBoundTime,
java.lang.Long lowerExpireBoundTime,
java.lang.Long upperExpireBoundTime)
Override the schedule for the next time a document is crawled. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected IThreadContext threadContext
protected IJobManager jobManager
protected IIncrementalIngester ingester
protected boolean ingestAllowed
protected long currentTime
protected IJobDescription job
protected IRepositoryConnection connection
protected IRepositoryConnector connector
protected IRepositoryConnectionManager connMgr
protected java.lang.String[] legalLinkTypes
protected WorkerThread.OutputActivity ingestLogger
protected QueueTracker queueTracker
protected java.util.HashMap abortSet
protected java.lang.String outputVersion
protected java.util.HashMap referenceList
protected java.util.HashMap lowerRescheduleBounds
protected java.util.HashMap upperRescheduleBounds
protected java.util.HashMap lowerExpireBounds
protected java.util.HashMap upperExpireBounds
protected java.util.HashMap originationTimes
| Constructor Detail |
|---|
public WorkerThread.ProcessActivity(IThreadContext threadContext,
QueueTracker queueTracker,
IJobManager jobManager,
IIncrementalIngester ingester,
long currentTime,
IJobDescription job,
IRepositoryConnection connection,
IRepositoryConnector connector,
IRepositoryConnectionManager connMgr,
java.lang.String[] legalLinkTypes,
WorkerThread.OutputActivity ingestLogger,
java.util.HashMap abortSet,
java.lang.String outputVersion)
jobManager - is the job manageringester - is the ingester| Method Detail |
|---|
public void discard()
throws ManifoldCFException
ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime,
java.lang.String[] prereqEventNames)
throws ManifoldCFException
addDocumentReference in interface IProcessActivitylocalIdentifier - is the local document identifier to add (for the connector that
fetched the document).parentIdentifier - is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType - is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames - is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues - are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
The type of each object must either be a String, or a CharacterInput.originationTime - is the time, in ms since epoch, that the document originated. Pass null if none or unknown.prereqEventNames - are the names of the prerequisite events which this document requires prior to processing. Pass null if none.
ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues,
java.lang.Long originationTime)
throws ManifoldCFException
addDocumentReference in interface IProcessActivitylocalIdentifier - is the local document identifier to add (for the connector that
fetched the document).parentIdentifier - is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType - is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames - is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues - are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.originationTime - is the time, in ms since epoch, that the document originated. Pass null if none or unknown.
ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType,
java.lang.String[] dataNames,
java.lang.Object[][] dataValues)
throws ManifoldCFException
addDocumentReference in interface IProcessActivitylocalIdentifier - is the local document identifier to add (for the connector that
fetched the document).parentIdentifier - is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType - is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.dataNames - is the list of carry-down data from the parent to the child. May be null. Each name is limited to 255 characters!dataValues - are the values that correspond to the data names in the dataNames parameter. May be null only if dataNames is null.
ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier,
java.lang.String parentIdentifier,
java.lang.String relationshipType)
throws ManifoldCFException
addDocumentReference in interface IProcessActivitylocalIdentifier - is the local document identifier to add (for the connector that
fetched the document).parentIdentifier - is the document identifier that is considered to be the "parent"
of this identifier. May be null, if no hopcount filtering desired for this kind of relationship.relationshipType - is the string describing the kind of relationship described by this
reference. This must be one of the strings returned by the IRepositoryConnector method
"getRelationshipTypes()". May be null.
ManifoldCFException
public void addDocumentReference(java.lang.String localIdentifier)
throws ManifoldCFException
addDocumentReference in interface IProcessActivitylocalIdentifier - is the local document identifier to add (for the connector that
fetched the document).
ManifoldCFException
public java.lang.String[] retrieveParentData(java.lang.String localIdentifier,
java.lang.String dataName)
throws ManifoldCFException
retrieveParentData in interface IProcessActivitylocalIdentifier - is the document identifier of the document we want the recorded data for.dataName - is the name of the data items to retrieve.
ManifoldCFException
public CharacterInput[] retrieveParentDataAsFiles(java.lang.String localIdentifier,
java.lang.String dataName)
throws ManifoldCFException
retrieveParentDataAsFiles in interface IProcessActivitylocalIdentifier - is the document identifier of the document we want the recorded data for.dataName - is the name of the data items to retrieve.
ManifoldCFException
public void recordDocument(java.lang.String documentIdentifier,
java.lang.String version)
throws ManifoldCFException,
ServiceInterruption
recordDocument in interface IProcessActivitydocumentIdentifier - is the document identifier.version - is the document version.
ManifoldCFException
ServiceInterruption
public void ingestDocument(java.lang.String documentIdentifier,
java.lang.String version,
java.lang.String documentURI,
RepositoryDocument data)
throws ManifoldCFException,
ServiceInterruption
ingestDocument in interface IProcessActivitydocumentIdentifier - is the document's local identifier.version - is the version of the document, as reported by the getDocumentVersions() method of the
corresponding repository connector.documentURI - is the URI to use to retrieve this document from the search interface (and is
also the unique key in the index).data - is the document data. The data is closed after ingestion is complete.
ManifoldCFException
ServiceInterruption
public void deleteDocument(java.lang.String documentIdentifier)
throws ManifoldCFException,
ServiceInterruption
deleteDocument in interface IProcessActivitydocumentIdentifier - is the document's local identifier.
ManifoldCFException
ServiceInterruption
public void setDocumentScheduleBounds(java.lang.String localIdentifier,
java.lang.Long lowerRecrawlBoundTime,
java.lang.Long upperRecrawlBoundTime,
java.lang.Long lowerExpireBoundTime,
java.lang.Long upperExpireBoundTime)
throws ManifoldCFException
setDocumentScheduleBounds in interface IProcessActivitylocalIdentifier - is the document's local identifier.lowerRecrawlBoundTime - is the time in ms since epoch that the reschedule time should not fall BELOW, or null if none.upperRecrawlBoundTime - is the time in ms since epoch that the reschedule time should not rise ABOVE, or null if none.lowerExpireBoundTime - is the time in ms since epoch that the expire time should not fall BELOW, or null if none.upperExpireBoundTime - is the time in ms since epoch that the expire time should not rise ABOVE, or null if none.
ManifoldCFException
public void setDocumentOriginationTime(java.lang.String localIdentifier,
java.lang.Long originationTime)
throws ManifoldCFException
setDocumentOriginationTime in interface IProcessActivitylocalIdentifier - is the document's local identifier.originationTime - is the document's origination time, or null if unknown.
ManifoldCFExceptionpublic java.lang.Long getDocumentRescheduleLowerBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentRescheduleUpperBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentExpirationLowerBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentExpirationUpperBoundTime(java.lang.String localIdentifier)
public java.lang.Long getDocumentOriginationTime(java.lang.String localIdentifier)
public java.lang.Long calculateDocumentRescheduleTime(long currentTime,
long timeAmt,
java.lang.String localIdentifier)
public java.lang.Long calculateDocumentExpireTime(long currentTime,
java.lang.String localIdentifier)
public void resetTimes()
public void recordActivity(java.lang.Long startTime,
java.lang.String activityType,
java.lang.Long dataSize,
java.lang.String entityIdentifier,
java.lang.String resultCode,
java.lang.String resultDescription,
java.lang.String[] childIdentifiers)
throws ManifoldCFException
recordActivity in interface IHistoryActivitystartTime - is either null or the time since the start of epoch in milliseconds (Jan 1, 1970). Every
activity has an associated time; the startTime field records when the activity began. A null value
indicates that the start time and the finishing time are the same.activityType - is a string which is fully interpretable only in the context of the connector involved, which is
used to categorize what kind of activity is being recorded. For example, a web connector might record a
"fetch document" activity. Cannot be null.dataSize - is the number of bytes of data involved in the activity, or null if not applicable.entityIdentifier - is a (possibly long) string which identifies the object involved in the history record.
The interpretation of this field will differ from connector to connector. May be null.resultCode - contains a terse description of the result of the activity. The description is limited in
size to 255 characters, and can be interpreted only in the context of the current connector. May be null.resultDescription - is a (possibly long) human-readable string which adds detail, if required, to the result
described in the resultCode field. This field is not meant to be queried on. May be null.childIdentifiers - is a set of child entity identifiers associated with this activity. May be null.
ManifoldCFException
public void flush()
throws ManifoldCFException
ManifoldCFException
protected void processDocumentReferences()
throws ManifoldCFException
ManifoldCFException
public void checkJobStillActive()
throws ManifoldCFException,
ServiceInterruption
checkJobStillActive in interface IAbortActivityManifoldCFException
ServiceInterruption
public boolean beginEventSequence(java.lang.String eventName)
throws ManifoldCFException
beginEventSequence in interface IEventActivityeventName - is the event name.
ManifoldCFException
public void completeEventSequence(java.lang.String eventName)
throws ManifoldCFException
completeEventSequence in interface IEventActivityeventName - is the event name.
ManifoldCFException
public void retryDocumentProcessing(java.lang.String localIdentifier)
throws ManifoldCFException
retryDocumentProcessing in interface IEventActivitylocalIdentifier - is the document identifier to requeue
ManifoldCFException
public boolean checkMimeTypeIndexable(java.lang.String mimeType)
throws ManifoldCFException,
ServiceInterruption
checkMimeTypeIndexable in interface IFingerprintActivitymimeType - is the mime type to check, not including any character set specification.
ManifoldCFException
ServiceInterruption
public boolean checkDocumentIndexable(java.io.File localFile)
throws ManifoldCFException,
ServiceInterruption
checkDocumentIndexable in interface IFingerprintActivitylocalFile - is the local copy of the file to check.
ManifoldCFException
ServiceInterruptionpublic java.lang.String createGlobalString(java.lang.String simpleString)
createGlobalString in interface INamingActivitysimpleString - is the simple string.
public java.lang.String createConnectionSpecificString(java.lang.String simpleString)
createConnectionSpecificString in interface INamingActivitysimpleString - is the simple string.
public java.lang.String createJobSpecificString(java.lang.String simpleString)
createJobSpecificString in interface INamingActivitysimpleString - is the simple string.
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||