001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.action.hadoop;
016    
017    import java.io.BufferedReader;
018    import java.io.File;
019    import java.io.FileNotFoundException;
020    import java.io.IOException;
021    import java.io.InputStream;
022    import java.io.InputStreamReader;
023    import java.io.StringReader;
024    import java.net.ConnectException;
025    import java.net.URI;
026    import java.net.UnknownHostException;
027    import java.util.ArrayList;
028    import java.util.HashSet;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Properties;
032    import java.util.Set;
033    
034    import org.apache.hadoop.conf.Configuration;
035    import org.apache.hadoop.filecache.DistributedCache;
036    import org.apache.hadoop.fs.FileSystem;
037    import org.apache.hadoop.fs.Path;
038    import org.apache.hadoop.fs.permission.AccessControlException;
039    import org.apache.hadoop.mapred.JobClient;
040    import org.apache.hadoop.mapred.JobConf;
041    import org.apache.hadoop.mapred.JobID;
042    import org.apache.hadoop.mapred.RunningJob;
043    import org.apache.hadoop.util.DiskChecker;
044    import org.apache.oozie.action.ActionExecutor;
045    import org.apache.oozie.action.ActionExecutorException;
046    import org.apache.oozie.client.OozieClient;
047    import org.apache.oozie.client.WorkflowAction;
048    import org.apache.oozie.service.HadoopAccessorException;
049    import org.apache.oozie.service.HadoopAccessorService;
050    import org.apache.oozie.service.Services;
051    import org.apache.oozie.service.WorkflowAppService;
052    import org.apache.oozie.servlet.CallbackServlet;
053    import org.apache.oozie.util.IOUtils;
054    import org.apache.oozie.util.PropertiesUtils;
055    import org.apache.oozie.util.XConfiguration;
056    import org.apache.oozie.util.XLog;
057    import org.apache.oozie.util.XmlUtils;
058    import org.jdom.Element;
059    import org.jdom.JDOMException;
060    import org.jdom.Namespace;
061    
062    public class JavaActionExecutor extends ActionExecutor {
063    
064        private static final String HADOOP_USER = "user.name";
065        private static final String HADOOP_UGI = "hadoop.job.ugi";
066        private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker";
067        private static final String HADOOP_NAME_NODE = "fs.default.name";
068    
069        private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>();
070    
071        private static int maxActionOutputLen;
072    
073        private static final String SUCCEEDED = "SUCCEEDED";
074        private static final String KILLED = "KILLED";
075        private static final String FAILED = "FAILED";
076        private static final String FAILED_KILLED = "FAILED/KILLED";
077        private static final String RUNNING = "RUNNING";
078        private XLog log = XLog.getLog(getClass());
079    
080        static {
081            DISALLOWED_PROPERTIES.add(HADOOP_USER);
082            DISALLOWED_PROPERTIES.add(HADOOP_UGI);
083            DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER);
084            DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE);
085            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME);
086            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME);
087        }
088    
089        public JavaActionExecutor() {
090            this("java");
091        }
092    
093        protected JavaActionExecutor(String type) {
094            super(type);
095        }
096    
097        protected String getLauncherJarName() {
098            return getType() + "-launcher.jar";
099        }
100    
101        protected List<Class> getLauncherClasses() {
102            List<Class> classes = new ArrayList<Class>();
103            classes.add(LauncherMapper.class);
104            classes.add(LauncherSecurityManager.class);
105            classes.add(LauncherException.class);
106            return classes;
107        }
108    
109        @Override
110        public void initActionType() {
111            super.initActionType();
112            maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024);
113            try {
114                List<Class> classes = getLauncherClasses();
115                Class[] launcherClasses = classes.toArray(new Class[classes.size()]);
116                IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses);
117    
118                registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001");
119                registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT,
120                              "JA002");
121                registerError(DiskChecker.DiskOutOfSpaceException.class.getName(),
122                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003");
123                registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(),
124                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004");
125                registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(),
126                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005");
127                registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006");
128                registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007");
129                registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008");
130                registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009");
131            }
132            catch (IOException ex) {
133                throw new RuntimeException(ex);
134            }
135        }
136    
137        void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException {
138            for (String prop : DISALLOWED_PROPERTIES) {
139                if (conf.get(prop) != null) {
140                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010",
141                                                      "Property [{0}] not allowed in action [{1}] configuration", prop, confName);
142                }
143            }
144        }
145    
146        Configuration createBaseHadoopConf(Context context, Element actionXml) {
147            Configuration conf = new XConfiguration();
148            conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER));
149            conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI));
150            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) {
151                conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get(
152                        WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
153            }
154            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) {
155                conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get(
156                        WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
157            }
158            conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME));
159            Namespace ns = actionXml.getNamespace();
160            String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim();
161            String nameNode = actionXml.getChild("name-node", ns).getTextTrim();
162            conf.set(HADOOP_JOB_TRACKER, jobTracker);
163            conf.set(HADOOP_NAME_NODE, nameNode);
164            conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true");
165            return conf;
166        }
167    
168        Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException {
169            try {
170                Namespace ns = actionXml.getNamespace();
171                Element e = actionXml.getChild("configuration", ns);
172                if (e != null) {
173                    String strConf = XmlUtils.prettyPrint(e).toString();
174                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
175    
176                    XConfiguration launcherConf = new XConfiguration();
177                    for (Map.Entry<String, String> entry : inlineConf) {
178                        if (entry.getKey().startsWith("oozie.launcher.")) {
179                            String name = entry.getKey().substring("oozie.launcher.".length());
180                            String value = entry.getValue();
181                            // setting original KEY
182                            launcherConf.set(entry.getKey(), value);
183                            // setting un-prefixed key (to allow Hadoop job config
184                            // for the launcher job
185                            launcherConf.set(name, value);
186                        }
187                    }
188                    checkForDisallowedProps(launcherConf, "inline launcher configuration");
189                    XConfiguration.copy(launcherConf, conf);
190                }
191                return conf;
192            }
193            catch (IOException ex) {
194                throw convertException(ex);
195            }
196        }
197    
198        protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException {
199            try {
200                Element actionXml = XmlUtils.parseXml(action.getConf());
201                return getActionFileSystem(context, actionXml);
202            }
203            catch (JDOMException ex) {
204                throw convertException(ex);
205            }
206        }
207    
208        protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException {
209            try {
210                return context.getAppFileSystem();
211            }
212            catch (Exception ex) {
213                throw convertException(ex);
214            }
215        }
216    
217        Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath)
218                throws ActionExecutorException {
219            try {
220                Namespace ns = actionXml.getNamespace();
221                Element e = actionXml.getChild("job-xml", ns);
222                if (e != null) {
223                    String jobXml = e.getTextTrim();
224                    Path path = new Path(appPath, jobXml);
225                    FileSystem fs = getActionFileSystem(context, actionXml);
226                    Configuration jobXmlConf = new XConfiguration(fs.open(path));
227                    checkForDisallowedProps(jobXmlConf, "job-xml");
228                    XConfiguration.copy(jobXmlConf, actionConf);
229                }
230                e = actionXml.getChild("configuration", ns);
231                if (e != null) {
232                    String strConf = XmlUtils.prettyPrint(e).toString();
233                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
234                    checkForDisallowedProps(inlineConf, "inline configuration");
235                    XConfiguration.copy(inlineConf, actionConf);
236                }
237                return actionConf;
238            }
239            catch (IOException ex) {
240                throw convertException(ex);
241            }
242        }
243    
244        Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive)
245                throws ActionExecutorException {
246            Path path = null;
247            try {
248                if (filePath.startsWith("/")) {
249                    path = new Path(filePath);
250                }
251                else {
252                    path = new Path(appPath, filePath);
253                }
254                URI uri = new URI(path.toUri().getPath());
255                if (archive) {
256                    DistributedCache.addCacheArchive(uri, conf);
257                }
258                else {
259                    String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
260                    if (fileName.endsWith(".so") || fileName.contains(".so.")) {  // .so files
261                        if (!fileName.endsWith(".so")) {
262                            int extAt = fileName.indexOf(".so.");
263                            fileName = fileName.substring(0, extAt + 3);
264                        }
265                        uri = new Path(path.toString() + "#" + fileName).toUri();
266                        uri = new URI(uri.getPath());
267                    }
268                    else if (fileName.endsWith(".jar")){  // .jar files
269                        if (!fileName.contains("#")) {
270                            path = new Path(uri.toString());
271    
272                            String user = conf.get("user.name");
273                            String group = conf.get("group.name");
274                            Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf);
275                        }
276                    }
277                    else { // regular files
278                        if (!fileName.contains("#")) {
279                            uri = new Path(path.toString() + "#" + fileName).toUri();
280                            uri = new URI(uri.getPath());
281                        }
282                    }
283                    DistributedCache.addCacheFile(uri, conf);
284                }
285                DistributedCache.createSymlink(conf);
286                return conf;
287            }
288            catch (Exception ex) {
289                XLog.getLog(getClass()).debug(
290                        "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf="
291                                + XmlUtils.prettyPrint(conf).toString());
292                throw convertException(ex);
293            }
294        }
295    
296        String getOozieLauncherJar(Context context) throws ActionExecutorException {
297            try {
298                return new Path(context.getActionDir(), getLauncherJarName()).toString();
299            }
300            catch (Exception ex) {
301                throw convertException(ex);
302            }
303        }
304    
305        void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
306            try {
307                Path actionDir = context.getActionDir();
308                Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp");
309                if (!actionFs.exists(actionDir)) {
310                    try {
311                        actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path(
312                                tempActionDir, getLauncherJarName()));
313                        actionFs.rename(tempActionDir, actionDir);
314                    }
315                    catch (IOException ex) {
316                        actionFs.delete(tempActionDir, true);
317                        actionFs.delete(actionDir, true);
318                        throw ex;
319                    }
320                }
321            }
322            catch (Exception ex) {
323                throw convertException(ex);
324            }
325        }
326    
327        void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
328            try {
329                Path actionDir = context.getActionDir();
330                if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false)
331                        && actionFs.exists(actionDir)) {
332                    actionFs.delete(actionDir, true);
333                }
334            }
335            catch (Exception ex) {
336                throw convertException(ex);
337            }
338        }
339    
340        @SuppressWarnings("unchecked")
341        void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf)
342                throws ActionExecutorException {
343            Configuration proto = context.getProtoActionConf();
344    
345            addToCache(conf, appPath, getOozieLauncherJar(context), false);
346    
347            String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST);
348            if (paths != null) {
349                for (String path : paths) {
350                    addToCache(conf, appPath, path, false);
351                }
352            }
353    
354            for (Element eProp : (List<Element>) actionXml.getChildren()) {
355                if (eProp.getName().equals("file")) {
356                    String path = eProp.getTextTrim();
357                    addToCache(conf, appPath, path, false);
358                }
359                else {
360                    if (eProp.getName().equals("archive")) {
361                        String path = eProp.getTextTrim();
362                        addToCache(conf, appPath, path, true);
363                    }
364                }
365            }
366        }
367    
368        protected String getLauncherMain(Configuration launcherConf, Element actionXml) {
369            Namespace ns = actionXml.getNamespace();
370            Element e = actionXml.getChild("main-class", ns);
371            return e.getTextTrim();
372        }
373    
374        private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>();
375    
376        static {
377            SPECIAL_PROPERTIES.add("mapred.job.queue.name");
378            SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal");
379            SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal");
380        }
381    
382        @SuppressWarnings("unchecked")
383        JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf)
384                throws ActionExecutorException {
385            try {
386                Path appPath = new Path(context.getWorkflow().getAppPath());
387    
388                // launcher job configuration
389                Configuration launcherConf = createBaseHadoopConf(context, actionXml);
390                setupLauncherConf(launcherConf, actionXml, appPath, context);
391    
392                // we are doing init+copy because if not we are getting 'hdfs'
393                // scheme not known
394                // its seems that new JobConf(Conf) does not load defaults, it
395                // assumes parameter Conf does.
396                JobConf launcherJobConf = new JobConf();
397                XConfiguration.copy(launcherConf, launcherJobConf);
398                setLibFilesArchives(context, actionXml, appPath, launcherJobConf);
399                String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
400                        .getAppName(), action.getName(), context.getWorkflow().getId());
401                launcherJobConf.setJobName(jobName);
402    
403                String jobId = context.getWorkflow().getId();
404                String actionId = action.getId();
405                Path actionDir = context.getActionDir();
406                String recoveryId = context.getRecoveryId();
407    
408                LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf);
409    
410                LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml));
411    
412                LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
413    
414                Namespace ns = actionXml.getNamespace();
415                List<Element> list = actionXml.getChildren("arg", ns);
416                String[] args = new String[list.size()];
417                for (int i = 0; i < list.size(); i++) {
418                    args[i] = list.get(i).getTextTrim();
419                }
420                LauncherMapper.setupMainArguments(launcherJobConf, args);
421    
422                Element opt = actionXml.getChild("java-opts", ns);
423                if (opt != null) {
424                    String opts = launcherConf.get("mapred.child.java.opts", "");
425                    opts = opts + " " + opt.getTextTrim();
426                    opts = opts.trim();
427                    launcherJobConf.set("mapred.child.java.opts", opts);
428                }
429    
430                // properties from action that are needed by the launcher (QUEUE
431                // NAME)
432                // maybe we should add queue to the WF schema, below job-tracker
433                for (String name : SPECIAL_PROPERTIES) {
434                    String value = actionConf.get(name);
435                    if (value != null) {
436                        launcherJobConf.set(name, value);
437                    }
438                }
439    
440                // to disable cancelation of delegation token on launcher job end
441                launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
442    
443                // setting the group owning the Oozie job to allow anybody in that
444                // group to kill the jobs.
445                launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
446    
447                return launcherJobConf;
448            }
449            catch (Exception ex) {
450                throw convertException(ex);
451            }
452        }
453    
454        private void injectCallback(Context context, Configuration conf) {
455            String callback = context.getCallbackUrl("$jobStatus");
456            if (conf.get("job.end.notification.url") != null) {
457                XLog.getLog(getClass()).warn("Overriding the action job end notification URI");
458            }
459            conf.set("job.end.notification.url", callback);
460        }
461    
462        void injectActionCallback(Context context, Configuration actionConf) {
463            injectCallback(context, actionConf);
464        }
465    
466        void injectLauncherCallback(Context context, Configuration launcherConf) {
467            injectCallback(context, launcherConf);
468        }
469    
470        void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException {
471            JobClient jobClient = null;
472            boolean exception = false;
473            try {
474                Path appPath = new Path(context.getWorkflow().getAppPath());
475                Element actionXml = XmlUtils.parseXml(action.getConf());
476    
477                // action job configuration
478                Configuration actionConf = createBaseHadoopConf(context, actionXml);
479                setupActionConf(actionConf, context, actionXml, appPath);
480                XLog.getLog(getClass()).debug("Setting LibFilesArchives ");
481                setLibFilesArchives(context, actionXml, appPath, actionConf);
482                String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
483                        .getAppName(), action.getName(), context.getWorkflow().getId());
484                actionConf.set("mapred.job.name", jobName);
485                injectActionCallback(context, actionConf);
486    
487                // setting the group owning the Oozie job to allow anybody in that
488                // group to kill the jobs.
489                actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
490    
491                JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf);
492                injectLauncherCallback(context, launcherJobConf);
493                XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId());
494                jobClient = createJobClient(context, launcherJobConf);
495                String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context
496                        .getRecoveryId());
497                boolean alreadyRunning = launcherId != null;
498                RunningJob runningJob;
499    
500                if (alreadyRunning) {
501                    runningJob = jobClient.getJob(JobID.forName(launcherId));
502                    if (runningJob == null) {
503                        String jobTracker = launcherJobConf.get("mapred.job.tracker");
504                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
505                                                          "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker);
506                    }
507                }
508                else {
509                    prepare(context, actionXml);
510                    XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId());
511    
512                    // setting up propagation of the delegation token.
513                    AuthHelper.get().set(jobClient, launcherJobConf);
514                    log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = "
515                            + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
516                    log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = "
517                            + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
518                    runningJob = jobClient.submitJob(launcherJobConf);
519                    if (runningJob == null) {
520                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
521                                                          "Error submitting launcher for action [{0}]", action.getId());
522                    }
523                    launcherId = runningJob.getID().toString();
524                    XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId);
525                }
526    
527                String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER);
528                String consoleUrl = runningJob.getTrackingURL();
529                context.setStartData(launcherId, jobTracker, consoleUrl);
530            }
531            catch (Exception ex) {
532                exception = true;
533                throw convertException(ex);
534            }
535            finally {
536                if (jobClient != null) {
537                    try {
538                        jobClient.close();
539                    }
540                    catch (Exception e) {
541                        if (exception) {
542                            log.error("JobClient error: ", e);
543                        }
544                        else {
545                            throw convertException(e);
546                        }
547                    }
548                }
549            }
550        }
551    
552        void prepare(Context context, Element actionXml) throws ActionExecutorException {
553            Namespace ns = actionXml.getNamespace();
554            Element prepare = actionXml.getChild("prepare", ns);
555            if (prepare != null) {
556                XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation");
557                FsActionExecutor fsAe = new FsActionExecutor();
558                fsAe.doOperations(context, prepare);
559                XLog.getLog(getClass()).debug("FS Operation is completed");
560            }
561        }
562    
563        @Override
564        public void start(Context context, WorkflowAction action) throws ActionExecutorException {
565            try {
566                XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System");
567                FileSystem actionFs = getActionFileSystem(context, action);
568                XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir());
569                prepareActionDir(actionFs, context);
570                XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action ");
571                submitLauncher(context, action);
572                XLog.getLog(getClass()).debug("Action submit completed. Performing check ");
573                check(context, action);
574                XLog.getLog(getClass()).debug("Action check is done after submission");
575            }
576            catch (Exception ex) {
577                throw convertException(ex);
578            }
579        }
580    
581        @Override
582        public void end(Context context, WorkflowAction action) throws ActionExecutorException {
583            try {
584                String externalStatus = action.getExternalStatus();
585                WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK
586                                               : WorkflowAction.Status.ERROR;
587                context.setEndData(status, getActionSignal(status));
588            }
589            catch (Exception ex) {
590                throw convertException(ex);
591            }
592            finally {
593                try {
594                    FileSystem actionFs = getActionFileSystem(context, action);
595                    cleanUpActionDir(actionFs, context);
596                }
597                catch (Exception ex) {
598                    throw convertException(ex);
599                }
600            }
601        }
602    
603        /**
604         * Create job client object
605         * @param context
606         * @param jobConf
607         * @return
608         * @throws HadoopAccessorException
609         */
610        protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException {
611            String user = context.getWorkflow().getUser();
612            String group = context.getWorkflow().getGroup();
613            return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf);
614        }
615    
616        @Override
617        public void check(Context context, WorkflowAction action) throws ActionExecutorException {
618            JobClient jobClient = null;
619            boolean exception = false;
620            try {
621                Element actionXml = XmlUtils.parseXml(action.getConf());
622                FileSystem actionFs = getActionFileSystem(context, actionXml);
623                Configuration conf = createBaseHadoopConf(context, actionXml);
624                JobConf jobConf = new JobConf();
625                XConfiguration.copy(conf, jobConf);
626                jobClient = createJobClient(context, jobConf);
627                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
628                if (runningJob == null) {
629                    context.setExternalStatus(FAILED);
630                    context.setExecutionData(FAILED, null);
631                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
632                                                      "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", action
633                            .getExternalId(), action.getId());
634                }
635                if (runningJob.isComplete()) {
636                    Path actionDir = context.getActionDir();
637    
638                    String user = context.getWorkflow().getUser();
639                    String group = context.getWorkflow().getGroup();
640                    if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) {
641                        String launcherId = action.getExternalId();
642                        Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir());
643                        InputStream is = actionFs.open(idSwapPath);
644                        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
645                        Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
646                        reader.close();
647                        String newId = props.getProperty("id");
648                        runningJob = jobClient.getJob(JobID.forName(newId));
649                        if (runningJob == null) {
650                            context.setExternalStatus(FAILED);
651                            throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
652                                                              "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", newId,
653                                                              action.getId());
654                        }
655    
656                        context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL());
657                        XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId,
658                                                     newId);
659                    }
660                    if (runningJob.isComplete()) {
661                        XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]",
662                                                     action.getExternalId());
663                        if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) {
664                            Properties props = null;
665                            if (getCaptureOutput(action)) {
666                                props = new Properties();
667                                if (LauncherMapper.hasOutputData(runningJob)) {
668                                    Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir());
669                                    InputStream is = actionFs.open(actionOutput);
670                                    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
671                                    props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
672                                    reader.close();
673                                }
674                            }
675                            context.setExecutionData(SUCCEEDED, props);
676                            XLog.getLog(getClass()).info(XLog.STD, "action produced output");
677                        }
678                        else {
679                            XLog log = XLog.getLog(getClass());
680                            String errorReason;
681                            Path actionError = LauncherMapper.getErrorPath(context.getActionDir());
682                            if (actionFs.exists(actionError)) {
683                                InputStream is = actionFs.open(actionError);
684                                BufferedReader reader = new BufferedReader(new InputStreamReader(is));
685                                Properties props = PropertiesUtils.readProperties(reader, -1);
686                                reader.close();
687                                errorReason = props.getProperty("error.reason");
688                                log.warn("Launcher ERROR, reason: {0}", errorReason);
689                                String exMsg = props.getProperty("exception.message");
690                                String errorInfo = (exMsg != null) ? exMsg : errorReason;
691                                context.setErrorInfo("JA018", errorInfo);
692                                String exStackTrace = props.getProperty("exception.stacktrace");
693                                if (exMsg != null) {
694                                    log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace);
695                                }
696                            }
697                            else {
698                                errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action
699                                        .getTrackerUri(), action.getExternalId());
700                                log.warn(errorReason);
701                            }
702                            context.setExecutionData(FAILED_KILLED, null);
703                        }
704                    }
705                    else {
706                        context.setExternalStatus(RUNNING);
707                        XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
708                                                     action.getExternalId(), action.getExternalStatus());
709                    }
710                }
711                else {
712                    context.setExternalStatus(RUNNING);
713                    XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
714                                                 action.getExternalId(), action.getExternalStatus());
715                }
716            }
717            catch (Exception ex) {
718                XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex);
719                exception = true;
720                throw convertException(ex);
721            }
722            finally {
723                if (jobClient != null) {
724                    try {
725                        jobClient.close();
726                    }
727                    catch (Exception e) {
728                        if (exception) {
729                            log.error("JobClient error: ", e);
730                        }
731                        else {
732                            throw convertException(e);
733                        }
734                    }
735                }
736            }
737        }
738    
739        protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException {
740            Element eConf = XmlUtils.parseXml(action.getConf());
741            Namespace ns = eConf.getNamespace();
742            Element captureOutput = eConf.getChild("capture-output", ns);
743            return captureOutput != null;
744        }
745    
746        @Override
747        public void kill(Context context, WorkflowAction action) throws ActionExecutorException {
748            JobClient jobClient = null;
749            boolean exception = false;
750            try {
751                Element actionXml = XmlUtils.parseXml(action.getConf());
752                Configuration conf = createBaseHadoopConf(context, actionXml);
753                JobConf jobConf = new JobConf();
754                XConfiguration.copy(conf, jobConf);
755                jobClient = createJobClient(context, jobConf);
756                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
757                if (runningJob != null) {
758                    runningJob.killJob();
759                }
760                context.setExternalStatus(KILLED);
761                context.setExecutionData(KILLED, null);
762            }
763            catch (Exception ex) {
764                exception = true;
765                throw convertException(ex);
766            }
767            finally {
768                try {
769                    FileSystem actionFs = getActionFileSystem(context, action);
770                    cleanUpActionDir(actionFs, context);
771                    if (jobClient != null) {
772                        jobClient.close();
773                    }
774                }
775                catch (Exception ex) {
776                    if (exception) {
777                        log.error("Error: ", ex);
778                    }
779                    else {
780                        throw convertException(ex);
781                    }
782                }
783            }
784        }
785    
786        private static Set<String> FINAL_STATUS = new HashSet<String>();
787    
788        static {
789            FINAL_STATUS.add(SUCCEEDED);
790            FINAL_STATUS.add(KILLED);
791            FINAL_STATUS.add(FAILED);
792            FINAL_STATUS.add(FAILED_KILLED);
793        }
794    
795        @Override
796        public boolean isCompleted(String externalStatus) {
797            return FINAL_STATUS.contains(externalStatus);
798        }
799    
800    }