public class ScoreUpdater extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.util.Tool, org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable>, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable,org.apache.hadoop.io.Text,CrawlDatum>
| Modifier and Type | Field and Description |
|---|---|
static org.slf4j.Logger |
LOG |
| Constructor and Description |
|---|
ScoreUpdater() |
| Modifier and Type | Method and Description |
|---|---|
void |
close() |
void |
configure(org.apache.hadoop.mapred.JobConf conf) |
static void |
main(String[] args) |
void |
map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Writable value,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable> output,
org.apache.hadoop.mapred.Reporter reporter)
Changes input into ObjectWritables.
|
void |
reduce(org.apache.hadoop.io.Text key,
Iterator<org.apache.hadoop.io.ObjectWritable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output,
org.apache.hadoop.mapred.Reporter reporter)
Creates new CrawlDatum objects with the updated score from the NodeDb or
with a cleared score.
|
int |
run(String[] args)
Runs the ScoreUpdater tool.
|
void |
update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path webGraphDb)
Updates the inlink score in the web graph node databsae into the crawl
database.
|
public void configure(org.apache.hadoop.mapred.JobConf conf)
configure in interface org.apache.hadoop.mapred.JobConfigurablepublic void map(org.apache.hadoop.io.Text key,
org.apache.hadoop.io.Writable value,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable> output,
org.apache.hadoop.mapred.Reporter reporter)
throws IOException
map in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable>IOExceptionpublic void reduce(org.apache.hadoop.io.Text key,
Iterator<org.apache.hadoop.io.ObjectWritable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,CrawlDatum> output,
org.apache.hadoop.mapred.Reporter reporter)
throws IOException
reduce in interface org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.ObjectWritable,org.apache.hadoop.io.Text,CrawlDatum>IOExceptionpublic void close()
close in interface Closeableclose in interface AutoCloseablepublic void update(org.apache.hadoop.fs.Path crawlDb,
org.apache.hadoop.fs.Path webGraphDb)
throws IOException
crawlDb - The crawl database to updatewebGraphDb - The webgraph database to use.IOException - If an error occurs while updating the scores.Copyright © 2014 The Apache Software Foundation