public class ParseSegment extends org.apache.hadoop.conf.Configured implements org.apache.hadoop.util.Tool, org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.WritableComparable<?>,Content,org.apache.hadoop.io.Text,ParseImpl>, org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable>
| Modifier and Type | Field and Description |
|---|---|
static org.slf4j.Logger |
LOG |
static String |
SKIP_TRUNCATED |
| Constructor and Description |
|---|
ParseSegment() |
ParseSegment(org.apache.hadoop.conf.Configuration conf) |
| Modifier and Type | Method and Description |
|---|---|
void |
close() |
void |
configure(org.apache.hadoop.mapred.JobConf job) |
static boolean |
isTruncated(Content content)
Checks if the page's content is truncated.
|
static void |
main(String[] args) |
void |
map(org.apache.hadoop.io.WritableComparable<?> key,
Content content,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,ParseImpl> output,
org.apache.hadoop.mapred.Reporter reporter) |
void |
parse(org.apache.hadoop.fs.Path segment) |
void |
reduce(org.apache.hadoop.io.Text key,
Iterator<org.apache.hadoop.io.Writable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable> output,
org.apache.hadoop.mapred.Reporter reporter) |
int |
run(String[] args) |
public static final org.slf4j.Logger LOG
public static final String SKIP_TRUNCATED
public ParseSegment()
public ParseSegment(org.apache.hadoop.conf.Configuration conf)
public void configure(org.apache.hadoop.mapred.JobConf job)
configure in interface org.apache.hadoop.mapred.JobConfigurablepublic void close()
close in interface Closeableclose in interface AutoCloseablepublic void map(org.apache.hadoop.io.WritableComparable<?> key,
Content content,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,ParseImpl> output,
org.apache.hadoop.mapred.Reporter reporter)
throws IOException
map in interface org.apache.hadoop.mapred.Mapper<org.apache.hadoop.io.WritableComparable<?>,Content,org.apache.hadoop.io.Text,ParseImpl>IOExceptionpublic static boolean isTruncated(Content content)
content - true. When it is not,
or when it could be determined, false.public void reduce(org.apache.hadoop.io.Text key,
Iterator<org.apache.hadoop.io.Writable> values,
org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable> output,
org.apache.hadoop.mapred.Reporter reporter)
throws IOException
reduce in interface org.apache.hadoop.mapred.Reducer<org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable,org.apache.hadoop.io.Text,org.apache.hadoop.io.Writable>IOExceptionpublic void parse(org.apache.hadoop.fs.Path segment)
throws IOException
IOExceptionCopyright © 2014 The Apache Software Foundation