org.apache.hadoop.mapred
Class Task

java.lang.Object
  extended by org.apache.hadoop.mapred.Task
All Implemented Interfaces:
Configurable, Writable

public abstract class Task
extends Object
implements Writable, Configurable

Base class for tasks. This is NOT a public interface.


Nested Class Summary
protected static class Task.CombineOutputCollector<K,V>
          OutputCollector for the combiner.
protected static class Task.CombinerRunner<K,V>
           
protected static class Task.CombineValuesIterator<KEY,VALUE>
           
static class Task.Counter
           
protected static class Task.NewCombinerRunner<K,V>
           
protected static class Task.OldCombinerRunner<K,V>
           
protected  class Task.TaskReporter
           
 
Field Summary
protected  OutputCommitter committer
           
protected  JobConf conf
           
static long DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS
           
protected static String FILESYSTEM_COUNTER_GROUP
          Name of the FileSystem counters' group
protected  boolean jobCleanup
           
protected  JobContext jobContext
           
protected  JobStatus.State jobRunStateForCleanup
           
protected  boolean jobSetup
           
protected  org.apache.hadoop.mapred.JvmContext jvmContext
           
protected  LocalDirAllocator lDirAlloc
           
protected  org.apache.hadoop.mapred.MapOutputFile mapOutputFile
           
static String MR_COMBINE_RECORDS_BEFORE_PROGRESS
           
protected  OutputFormat<?,?> outputFormat
           
static int PROGRESS_INTERVAL
          The number of milliseconds between progress reports.
protected  Counters.Counter spilledRecordsCounter
           
protected  boolean taskCleanup
           
protected  TaskAttemptContext taskContext
           
protected  SecretKey tokenSecret
           
protected  TaskUmbilicalProtocol umbilical
           
 
Constructor Summary
Task()
           
Task(String jobFile, TaskAttemptID taskId, int partition, int numSlotsRequired)
           
 
Method Summary
protected static
<INKEY,INVALUE,OUTKEY,OUTVALUE>
Reducer.Context
createReduceContext(Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer, Configuration job, TaskAttemptID taskId, RawKeyValueIterator rIter, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<OUTKEY,OUTVALUE> output, OutputCommitter committer, StatusReporter reporter, RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass)
           
abstract  org.apache.hadoop.mapred.TaskRunner createRunner(TaskTracker tracker, org.apache.hadoop.mapred.TaskTracker.TaskInProgress tip, org.apache.hadoop.mapred.TaskTracker.RunningJob rjob)
          Return an approprate thread runner for this task.
 void done(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
 Configuration getConf()
          Return the configuration used by this object.
protected static String[] getFileSystemCounterNames(String uriScheme)
          Counters to measure the usage of the different file systems.
protected static FileSystem.Statistics getFsStatistics(Path path, Configuration conf)
          Gets a handle to the Statistics instance based on the scheme associated with path.
 String getJobFile()
           
 JobID getJobID()
          Get the job name for this task.
 SecretKey getJobTokenSecret()
          Get the job token secret
 org.apache.hadoop.mapred.JvmContext getJvmContext()
          Gets the task JvmContext
 int getNumSlotsRequired()
           
 int getPartition()
          Get the index of this task within the job.
 TaskStatus.Phase getPhase()
          Return current phase of the task.
 Progress getProgress()
           
 org.apache.hadoop.mapred.SortedRanges getSkipRanges()
          Get skipRanges.
 TaskAttemptID getTaskID()
           
 String getUser()
          Get the name of the user running the job/task.
 void initialize(JobConf job, JobID id, Reporter reporter, boolean useNewApi)
           
abstract  boolean isMapTask()
           
 boolean isSkipping()
          Is Task in skipping mode.
 void localizeConfiguration(JobConf conf)
          Localize the given JobConf to be specific for this task.
 void readFields(DataInput in)
          Deserialize the fields of this object from in.
protected  void reportFatalError(TaskAttemptID id, Throwable throwable, String logMsg)
          Report a fatal error to the parent (task) tracker.
protected  void reportNextRecordRange(TaskUmbilicalProtocol umbilical, long nextRecIndex)
          Reports the next executing record range to TaskTracker.
abstract  void run(JobConf job, TaskUmbilicalProtocol umbilical)
          Run this task as a part of the named job.
protected  void runJobCleanupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
protected  void runJobSetupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
protected  void runTaskCleanupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
 void setConf(Configuration conf)
          Set the configuration to be used by this object.
 void setJobFile(String jobFile)
           
 void setJobTokenSecret(SecretKey tokenSecret)
          Set the job token secret
 void setJvmContext(org.apache.hadoop.mapred.JvmContext jvmContext)
          Set the task JvmContext
protected  void setPhase(TaskStatus.Phase phase)
          Set current phase of the task.
 void setSkipping(boolean skipping)
          Sets whether to run Task in skipping mode.
 void setSkipRanges(org.apache.hadoop.mapred.SortedRanges skipRanges)
          Set skipRanges.
protected  void setWriteSkipRecs(boolean writeSkipRecs)
          Set whether to write skip records.
protected  void statusUpdate(TaskUmbilicalProtocol umbilical)
           
protected  boolean supportIsolationRunner(JobConf conf)
           
 String toString()
           
protected  boolean toWriteSkipRecs()
          Get whether to write skip records.
 void write(DataOutput out)
          Serialize the fields of this object to out.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

MR_COMBINE_RECORDS_BEFORE_PROGRESS

public static final String MR_COMBINE_RECORDS_BEFORE_PROGRESS
See Also:
Constant Field Values

DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS

public static final long DEFAULT_MR_COMBINE_RECORDS_BEFORE_PROGRESS
See Also:
Constant Field Values

FILESYSTEM_COUNTER_GROUP

protected static final String FILESYSTEM_COUNTER_GROUP
Name of the FileSystem counters' group

See Also:
Constant Field Values

jobRunStateForCleanup

protected JobStatus.State jobRunStateForCleanup

jobCleanup

protected boolean jobCleanup

jobSetup

protected boolean jobSetup

taskCleanup

protected boolean taskCleanup

conf

protected JobConf conf

mapOutputFile

protected org.apache.hadoop.mapred.MapOutputFile mapOutputFile

lDirAlloc

protected LocalDirAllocator lDirAlloc

jobContext

protected JobContext jobContext

taskContext

protected TaskAttemptContext taskContext

outputFormat

protected OutputFormat<?,?> outputFormat

committer

protected OutputCommitter committer

spilledRecordsCounter

protected final Counters.Counter spilledRecordsCounter

umbilical

protected TaskUmbilicalProtocol umbilical

tokenSecret

protected SecretKey tokenSecret

jvmContext

protected org.apache.hadoop.mapred.JvmContext jvmContext

PROGRESS_INTERVAL

public static final int PROGRESS_INTERVAL
The number of milliseconds between progress reports.

See Also:
Constant Field Values
Constructor Detail

Task

public Task()

Task

public Task(String jobFile,
            TaskAttemptID taskId,
            int partition,
            int numSlotsRequired)
Method Detail

getFileSystemCounterNames

protected static String[] getFileSystemCounterNames(String uriScheme)
Counters to measure the usage of the different file systems. Always return the String array with two elements. First one is the name of BYTES_READ counter and second one is of the BYTES_WRITTEN counter.


setJobFile

public void setJobFile(String jobFile)

getJobFile

public String getJobFile()

getTaskID

public TaskAttemptID getTaskID()

getNumSlotsRequired

public int getNumSlotsRequired()

getJobID

public JobID getJobID()
Get the job name for this task.

Returns:
the job name

setJobTokenSecret

public void setJobTokenSecret(SecretKey tokenSecret)
Set the job token secret

Parameters:
tokenSecret - the secret

getJobTokenSecret

public SecretKey getJobTokenSecret()
Get the job token secret

Returns:
the token secret

setJvmContext

public void setJvmContext(org.apache.hadoop.mapred.JvmContext jvmContext)
Set the task JvmContext

Parameters:
jvmContext -

getJvmContext

public org.apache.hadoop.mapred.JvmContext getJvmContext()
Gets the task JvmContext

Returns:
the jvm context

getPartition

public int getPartition()
Get the index of this task within the job.

Returns:
the integer part of the task id

getPhase

public TaskStatus.Phase getPhase()
Return current phase of the task. needs to be synchronized as communication thread sends the phase every second

Returns:
the curent phase of the task

setPhase

protected void setPhase(TaskStatus.Phase phase)
Set current phase of the task.

Parameters:
phase - task phase

toWriteSkipRecs

protected boolean toWriteSkipRecs()
Get whether to write skip records.


setWriteSkipRecs

protected void setWriteSkipRecs(boolean writeSkipRecs)
Set whether to write skip records.


reportFatalError

protected void reportFatalError(TaskAttemptID id,
                                Throwable throwable,
                                String logMsg)
Report a fatal error to the parent (task) tracker.


getSkipRanges

public org.apache.hadoop.mapred.SortedRanges getSkipRanges()
Get skipRanges.


setSkipRanges

public void setSkipRanges(org.apache.hadoop.mapred.SortedRanges skipRanges)
Set skipRanges.


isSkipping

public boolean isSkipping()
Is Task in skipping mode.


setSkipping

public void setSkipping(boolean skipping)
Sets whether to run Task in skipping mode.

Parameters:
skipping -

getUser

public String getUser()
Get the name of the user running the job/task. TaskTracker needs task's user name even before it's JobConf is localized. So we explicitly serialize the user name.

Returns:
user

write

public void write(DataOutput out)
           throws IOException
Description copied from interface: Writable
Serialize the fields of this object to out.

Specified by:
write in interface Writable
Parameters:
out - DataOuput to serialize this object into.
Throws:
IOException

readFields

public void readFields(DataInput in)
                throws IOException
Description copied from interface: Writable
Deserialize the fields of this object from in.

For efficiency, implementations should attempt to re-use storage in the existing object where possible.

Specified by:
readFields in interface Writable
Parameters:
in - DataInput to deseriablize this object from.
Throws:
IOException

toString

public String toString()
Overrides:
toString in class Object

localizeConfiguration

public void localizeConfiguration(JobConf conf)
                           throws IOException
Localize the given JobConf to be specific for this task.

Throws:
IOException

run

public abstract void run(JobConf job,
                         TaskUmbilicalProtocol umbilical)
                  throws IOException,
                         ClassNotFoundException,
                         InterruptedException
Run this task as a part of the named job. This method is executed in the child process and is what invokes user-supplied map, reduce, etc. methods.

Parameters:
umbilical - for progress reports
Throws:
IOException
ClassNotFoundException
InterruptedException

createRunner

public abstract org.apache.hadoop.mapred.TaskRunner createRunner(TaskTracker tracker,
                                                                 org.apache.hadoop.mapred.TaskTracker.TaskInProgress tip,
                                                                 org.apache.hadoop.mapred.TaskTracker.RunningJob rjob)
                                                          throws IOException
Return an approprate thread runner for this task.

Parameters:
tip - TODO
Throws:
IOException

isMapTask

public abstract boolean isMapTask()

getProgress

public Progress getProgress()

initialize

public void initialize(JobConf job,
                       JobID id,
                       Reporter reporter,
                       boolean useNewApi)
                throws IOException,
                       ClassNotFoundException,
                       InterruptedException
Throws:
IOException
ClassNotFoundException
InterruptedException

reportNextRecordRange

protected void reportNextRecordRange(TaskUmbilicalProtocol umbilical,
                                     long nextRecIndex)
                              throws IOException
Reports the next executing record range to TaskTracker.

Parameters:
umbilical -
nextRecIndex - the record index which would be fed next.
Throws:
IOException

done

public void done(TaskUmbilicalProtocol umbilical,
                 Task.TaskReporter reporter)
          throws IOException,
                 InterruptedException
Throws:
IOException
InterruptedException

statusUpdate

protected void statusUpdate(TaskUmbilicalProtocol umbilical)
                     throws IOException
Throws:
IOException

runTaskCleanupTask

protected void runTaskCleanupTask(TaskUmbilicalProtocol umbilical,
                                  Task.TaskReporter reporter)
                           throws IOException,
                                  InterruptedException
Throws:
IOException
InterruptedException

runJobCleanupTask

protected void runJobCleanupTask(TaskUmbilicalProtocol umbilical,
                                 Task.TaskReporter reporter)
                          throws IOException,
                                 InterruptedException
Throws:
IOException
InterruptedException

supportIsolationRunner

protected boolean supportIsolationRunner(JobConf conf)

runJobSetupTask

protected void runJobSetupTask(TaskUmbilicalProtocol umbilical,
                               Task.TaskReporter reporter)
                        throws IOException,
                               InterruptedException
Throws:
IOException
InterruptedException

getFsStatistics

protected static FileSystem.Statistics getFsStatistics(Path path,
                                                       Configuration conf)
                                                throws IOException
Gets a handle to the Statistics instance based on the scheme associated with path.

Parameters:
path - the path.
Returns:
a Statistics instance, or null if none is found for the scheme.
Throws:
IOException

setConf

public void setConf(Configuration conf)
Description copied from interface: Configurable
Set the configuration to be used by this object.

Specified by:
setConf in interface Configurable

getConf

public Configuration getConf()
Description copied from interface: Configurable
Return the configuration used by this object.

Specified by:
getConf in interface Configurable

createReduceContext

protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> Reducer.Context createReduceContext(Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                                                                                     Configuration job,
                                                                                     TaskAttemptID taskId,
                                                                                     RawKeyValueIterator rIter,
                                                                                     Counter inputKeyCounter,
                                                                                     Counter inputValueCounter,
                                                                                     RecordWriter<OUTKEY,OUTVALUE> output,
                                                                                     OutputCommitter committer,
                                                                                     StatusReporter reporter,
                                                                                     RawComparator<INKEY> comparator,
                                                                                     Class<INKEY> keyClass,
                                                                                     Class<INVALUE> valueClass)
                                              throws IOException,
                                                     ClassNotFoundException
Throws:
IOException
ClassNotFoundException


Copyright © 2009 The Apache Software Foundation