tassl.application.workflow
Class AutonomicScheduler

java.lang.Object
  extended by java.lang.Thread
      extended by tassl.application.workflow.AutonomicScheduler
All Implemented Interfaces:
java.lang.Runnable

public class AutonomicScheduler
extends java.lang.Thread

Autonomic Scheduler / Resource Manager


Nested Class Summary
 
Nested classes/interfaces inherited from class java.lang.Thread
java.lang.Thread.State, java.lang.Thread.UncaughtExceptionHandler
 
Field Summary
 
Fields inherited from class java.lang.Thread
MAX_PRIORITY, MIN_PRIORITY, NORM_PRIORITY
 
Constructor Summary
AutonomicScheduler(java.lang.String address, int port, java.lang.String wflMasterAdd, int wflMasterPort, int monitorInterval)
           
 
Method Summary
protected  void calculateCostVMs(java.lang.String wflId, java.lang.String stageId)
           
 void calculateNetworkSpeed()
          Calculate network speed between agents.
protected  void cleanStagesFinalMaps(java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> wflStage_SitesDown)
          When a whole site goes down, we need to remove all its workers that are allocated to each workflow stage
protected  void cleanStagesFinalMaps(java.util.HashMap<java.lang.String,java.lang.String> wflStage_workerTasks, java.lang.String agentAddress, int agentPort)
          Remove workers that are allocated to each workflow stage but have been reported to be down
protected  java.util.List communicateAgentsScheduling(java.lang.String wflId, WorkflowStage stage, java.lang.String properties, java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> finalMap, long timestamp)
          Tell each agent resources that need to provision
protected  boolean dealStageDone(java.lang.String wflId, java.lang.String stageId, java.lang.String end, java.lang.String stagesInvolved)
          Do something when a stage is Done.
protected  java.lang.String filesForSite(WorkflowStage stage, java.lang.String tasks)
          Get list of files that a site will need
 java.lang.String findPolicyClass(java.lang.String policyName)
          Returns the class name that contains a policy with the name indicated in the argument or NULL if not found.
protected  void forceRescheduleAll()
           
protected  void forceWorkflowReschedule(java.lang.String workflowId)
           
protected  java.util.HashMap<java.lang.String,java.util.List<WorkerForScheduler>> getAllAvailableSlots(java.lang.String workflow, java.lang.String stage)
          Look into available resources and determine the number of slots(workers) available.
protected  java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> getFinalMap(java.util.HashMap<java.lang.Integer,WorkerForScheduler> MapTask, java.lang.String stageId)
          Return the number of machines and workers per site
 java.util.List getPolicies(java.lang.String fullclassName)
           
protected  void initPolicyCatalog()
           
protected  boolean rescheduleStages(java.util.HashMap<java.lang.String,java.lang.String> wflStage_workerTasks)
          Report failed tasks to the workflowMaster and retrieve
protected  boolean RoundRobin(java.lang.String wflId, WorkflowStage stage, java.lang.String properties)
           
 void run()
           
protected  boolean schedulePendingStages()
           
protected  boolean scheduleSingleStage(java.lang.String wflId, WorkflowStage stage, java.util.List<java.lang.String> nodeAcctList)
          Schedule a single stage
protected  boolean scheduleStages(java.lang.String wflId, java.util.List<WorkflowStage> stages)
          Schedule stages of a workflow.
 boolean sendInfo(java.lang.String address, int port, java.lang.String command, java.util.List param)
           
 
Methods inherited from class java.lang.Thread
activeCount, checkAccess, clone, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
 
Methods inherited from class java.lang.Object
equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Constructor Detail

AutonomicScheduler

public AutonomicScheduler(java.lang.String address,
                          int port,
                          java.lang.String wflMasterAdd,
                          int wflMasterPort,
                          int monitorInterval)
Method Detail

initPolicyCatalog

protected final void initPolicyCatalog()

getPolicies

public java.util.List getPolicies(java.lang.String fullclassName)

findPolicyClass

public java.lang.String findPolicyClass(java.lang.String policyName)
Returns the class name that contains a policy with the name indicated in the argument or NULL if not found.

Parameters:
policyName -
Returns:

run

public void run()
Specified by:
run in interface java.lang.Runnable
Overrides:
run in class java.lang.Thread

forceWorkflowReschedule

protected void forceWorkflowReschedule(java.lang.String workflowId)
                                throws java.io.IOException
Throws:
java.io.IOException

forceRescheduleAll

protected void forceRescheduleAll()
                           throws java.io.IOException
Throws:
java.io.IOException

rescheduleStages

protected boolean rescheduleStages(java.util.HashMap<java.lang.String,java.lang.String> wflStage_workerTasks)
                            throws java.io.IOException
Report failed tasks to the workflowMaster and retrieve

Returns:
Throws:
java.io.IOException

cleanStagesFinalMaps

protected void cleanStagesFinalMaps(java.util.HashMap<java.lang.String,java.lang.String> wflStage_workerTasks,
                                    java.lang.String agentAddress,
                                    int agentPort)
Remove workers that are allocated to each workflow stage but have been reported to be down


cleanStagesFinalMaps

protected void cleanStagesFinalMaps(java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> wflStage_SitesDown)
When a whole site goes down, we need to remove all its workers that are allocated to each workflow stage

Parameters:
wflStage_SitesDown - {"wkf.stage":{"site","site"},..}

dealStageDone

protected boolean dealStageDone(java.lang.String wflId,
                                java.lang.String stageId,
                                java.lang.String end,
                                java.lang.String stagesInvolved)
Do something when a stage is Done. By default tell agent to stop workers

Parameters:
wflId -
stageId -
end - indicates if there are more stages in the workflow. values: "end","noend"
stagesInvolved - Stages involved. This is useful when non-blocking stages
Returns:

calculateCostVMs

protected void calculateCostVMs(java.lang.String wflId,
                                java.lang.String stageId)

schedulePendingStages

protected boolean schedulePendingStages()

scheduleStages

protected boolean scheduleStages(java.lang.String wflId,
                                 java.util.List<WorkflowStage> stages)
Schedule stages of a workflow. If there is no enough resources, stages are stored in a queue.

Parameters:
wflId -
stages -
Returns:
true if all stages where scheduled, false otherwise

scheduleSingleStage

protected boolean scheduleSingleStage(java.lang.String wflId,
                                      WorkflowStage stage,
                                      java.util.List<java.lang.String> nodeAcctList)
Schedule a single stage

Parameters:
wflId -
stage -
nodeAcctList - List of nodes and timestamps of failed nodes. Only needed for rescheduling.
Returns:
status of the scheduling

getAllAvailableSlots

protected java.util.HashMap<java.lang.String,java.util.List<WorkerForScheduler>> getAllAvailableSlots(java.lang.String workflow,
                                                                                                      java.lang.String stage)
Look into available resources and determine the number of slots(workers) available. It includes the slots already allocated to a workflow-stage. This is used when rescheduling. So far slots(workers) are not share across stages.

Returns:
hashmap with slots per site.

getFinalMap

protected java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> getFinalMap(java.util.HashMap<java.lang.Integer,WorkerForScheduler> MapTask,
                                                                                           java.lang.String stageId)
Return the number of machines and workers per site

Parameters:
MapTask - Map task:slot(worker)
Returns:
HashMap site:{"type/name:numWorkers;...","task|task|task"}, site:{"type/name:numWorkers;..","task|task"}

communicateAgentsScheduling

protected java.util.List communicateAgentsScheduling(java.lang.String wflId,
                                                     WorkflowStage stage,
                                                     java.lang.String properties,
                                                     java.util.HashMap<java.lang.String,java.util.List<java.lang.String>> finalMap,
                                                     long timestamp)
Tell each agent resources that need to provision

Parameters:
wflId -
stage -
properties -
finalMap -
Returns:
list of sites where failed allocation

filesForSite

protected java.lang.String filesForSite(WorkflowStage stage,
                                        java.lang.String tasks)
Get list of files that a site will need

Parameters:
stage -
tasks - List of tasks in the format: "taskid|taskid|taskid"
Returns:
String in the format: "sierra.futuregrid.org:/file/path/,sierra.futuregrid.org:/file/path/|file1,file2" two positional lists

RoundRobin

protected boolean RoundRobin(java.lang.String wflId,
                             WorkflowStage stage,
                             java.lang.String properties)

sendInfo

public boolean sendInfo(java.lang.String address,
                        int port,
                        java.lang.String command,
                        java.util.List param)

calculateNetworkSpeed

public void calculateNetworkSpeed()
Calculate network speed between agents. Bandwidth when site A pulls a file from site B. FIXED FOR NOW