001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.action.hadoop;
016    
017    import java.io.BufferedReader;
018    import java.io.File;
019    import java.io.FileNotFoundException;
020    import java.io.IOException;
021    import java.io.InputStream;
022    import java.io.InputStreamReader;
023    import java.io.StringReader;
024    import java.net.ConnectException;
025    import java.net.URI;
026    import java.net.UnknownHostException;
027    import java.util.ArrayList;
028    import java.util.HashSet;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Properties;
032    import java.util.Set;
033    
034    import org.apache.hadoop.conf.Configuration;
035    import org.apache.hadoop.filecache.DistributedCache;
036    import org.apache.hadoop.fs.FileSystem;
037    import org.apache.hadoop.fs.Path;
038    import org.apache.hadoop.fs.permission.AccessControlException;
039    import org.apache.hadoop.mapred.JobClient;
040    import org.apache.hadoop.mapred.JobConf;
041    import org.apache.hadoop.mapred.JobID;
042    import org.apache.hadoop.mapred.RunningJob;
043    import org.apache.hadoop.util.DiskChecker;
044    import org.apache.oozie.action.ActionExecutor;
045    import org.apache.oozie.action.ActionExecutorException;
046    import org.apache.oozie.client.OozieClient;
047    import org.apache.oozie.client.WorkflowAction;
048    import org.apache.oozie.service.HadoopAccessorException;
049    import org.apache.oozie.service.HadoopAccessorService;
050    import org.apache.oozie.service.Services;
051    import org.apache.oozie.service.WorkflowAppService;
052    import org.apache.oozie.servlet.CallbackServlet;
053    import org.apache.oozie.util.IOUtils;
054    import org.apache.oozie.util.PropertiesUtils;
055    import org.apache.oozie.util.XConfiguration;
056    import org.apache.oozie.util.XLog;
057    import org.apache.oozie.util.XmlUtils;
058    import org.jdom.Element;
059    import org.jdom.JDOMException;
060    import org.jdom.Namespace;
061    
062    public class JavaActionExecutor extends ActionExecutor {
063    
064        private static final String HADOOP_USER = "user.name";
065        private static final String HADOOP_UGI = "hadoop.job.ugi";
066        private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker";
067        private static final String HADOOP_NAME_NODE = "fs.default.name";
068    
069        private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>();
070    
071        private static int maxActionOutputLen;
072    
073        private static final String SUCCEEDED = "SUCCEEDED";
074        private static final String KILLED = "KILLED";
075        private static final String FAILED = "FAILED";
076        private static final String FAILED_KILLED = "FAILED/KILLED";
077        private static final String RUNNING = "RUNNING";
078        protected XLog log = XLog.getLog(getClass());
079    
080        static {
081            DISALLOWED_PROPERTIES.add(HADOOP_USER);
082            DISALLOWED_PROPERTIES.add(HADOOP_UGI);
083            DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER);
084            DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE);
085            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME);
086            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME);
087        }
088    
089        public JavaActionExecutor() {
090            this("java");
091        }
092    
093        protected JavaActionExecutor(String type) {
094            super(type);
095        }
096    
097        protected String getLauncherJarName() {
098            return getType() + "-launcher.jar";
099        }
100    
101        protected List<Class> getLauncherClasses() {
102            List<Class> classes = new ArrayList<Class>();
103            classes.add(LauncherMapper.class);
104            classes.add(LauncherSecurityManager.class);
105            classes.add(LauncherException.class);
106            classes.add(LauncherMainException.class);
107            return classes;
108        }
109    
110        @Override
111        public void initActionType() {
112            super.initActionType();
113            maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024);
114            try {
115                List<Class> classes = getLauncherClasses();
116                Class[] launcherClasses = classes.toArray(new Class[classes.size()]);
117                IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses);
118    
119                registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001");
120                registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT,
121                              "JA002");
122                registerError(DiskChecker.DiskOutOfSpaceException.class.getName(),
123                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003");
124                registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(),
125                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004");
126                registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(),
127                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005");
128                registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006");
129                registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007");
130                registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008");
131                registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009");
132            }
133            catch (IOException ex) {
134                throw new RuntimeException(ex);
135            }
136        }
137    
138        void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException {
139            for (String prop : DISALLOWED_PROPERTIES) {
140                if (conf.get(prop) != null) {
141                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010",
142                                                      "Property [{0}] not allowed in action [{1}] configuration", prop, confName);
143                }
144            }
145        }
146    
147        Configuration createBaseHadoopConf(Context context, Element actionXml) {
148            Configuration conf = new XConfiguration();
149            conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER));
150            conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI));
151            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) {
152                conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get(
153                        WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
154            }
155            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) {
156                conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get(
157                        WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
158            }
159            conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME));
160            Namespace ns = actionXml.getNamespace();
161            String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim();
162            String nameNode = actionXml.getChild("name-node", ns).getTextTrim();
163            conf.set(HADOOP_JOB_TRACKER, jobTracker);
164            conf.set(HADOOP_NAME_NODE, nameNode);
165            conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true");
166            return conf;
167        }
168    
169        Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException {
170            try {
171                Namespace ns = actionXml.getNamespace();
172                Element e = actionXml.getChild("configuration", ns);
173                if (e != null) {
174                    String strConf = XmlUtils.prettyPrint(e).toString();
175                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
176    
177                    XConfiguration launcherConf = new XConfiguration();
178                    for (Map.Entry<String, String> entry : inlineConf) {
179                        if (entry.getKey().startsWith("oozie.launcher.")) {
180                            String name = entry.getKey().substring("oozie.launcher.".length());
181                            String value = entry.getValue();
182                            // setting original KEY
183                            launcherConf.set(entry.getKey(), value);
184                            // setting un-prefixed key (to allow Hadoop job config
185                            // for the launcher job
186                            launcherConf.set(name, value);
187                        }
188                    }
189                    checkForDisallowedProps(launcherConf, "inline launcher configuration");
190                    XConfiguration.copy(launcherConf, conf);
191                }
192                return conf;
193            }
194            catch (IOException ex) {
195                throw convertException(ex);
196            }
197        }
198    
199        protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException {
200            try {
201                Element actionXml = XmlUtils.parseXml(action.getConf());
202                return getActionFileSystem(context, actionXml);
203            }
204            catch (JDOMException ex) {
205                throw convertException(ex);
206            }
207        }
208    
209        protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException {
210            try {
211                return context.getAppFileSystem();
212            }
213            catch (Exception ex) {
214                throw convertException(ex);
215            }
216        }
217    
218        Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath)
219                throws ActionExecutorException {
220            try {
221                Namespace ns = actionXml.getNamespace();
222                Element e = actionXml.getChild("job-xml", ns);
223                if (e != null) {
224                    String jobXml = e.getTextTrim();
225                    Path path = new Path(appPath, jobXml);
226                    FileSystem fs = getActionFileSystem(context, actionXml);
227                    Configuration jobXmlConf = new XConfiguration(fs.open(path));
228                    checkForDisallowedProps(jobXmlConf, "job-xml");
229                    XConfiguration.copy(jobXmlConf, actionConf);
230                }
231                e = actionXml.getChild("configuration", ns);
232                if (e != null) {
233                    String strConf = XmlUtils.prettyPrint(e).toString();
234                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
235                    checkForDisallowedProps(inlineConf, "inline configuration");
236                    XConfiguration.copy(inlineConf, actionConf);
237                }
238                return actionConf;
239            }
240            catch (IOException ex) {
241                throw convertException(ex);
242            }
243        }
244    
245        Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive)
246                throws ActionExecutorException {
247            Path path = null;
248            try {
249                if (filePath.startsWith("/")) {
250                    path = new Path(filePath);
251                }
252                else {
253                    path = new Path(appPath, filePath);
254                }
255                URI uri = new URI(path.toUri().getPath());
256                if (archive) {
257                    DistributedCache.addCacheArchive(uri, conf);
258                }
259                else {
260                    String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
261                    if (fileName.endsWith(".so") || fileName.contains(".so.")) {  // .so files
262                        if (!fileName.endsWith(".so")) {
263                            int extAt = fileName.indexOf(".so.");
264                            fileName = fileName.substring(0, extAt + 3);
265                        }
266                        uri = new Path(path.toString() + "#" + fileName).toUri();
267                        uri = new URI(uri.getPath());
268                        DistributedCache.addCacheFile(uri, conf);
269                    }
270                    else if (fileName.endsWith(".jar")){  // .jar files
271                        if (!fileName.contains("#")) {
272                            path = new Path(uri.toString());
273    
274                            String user = conf.get("user.name");
275                            String group = conf.get("group.name");
276                            FileSystem fs =
277                              Services.get().get(HadoopAccessorService.class).createFileSystem(user, group, conf);
278                            DistributedCache.addFileToClassPath(path, conf, fs);
279                        }
280                        else {
281                            DistributedCache.addCacheFile(uri, conf);
282                        }
283                    }
284                    else { // regular files
285                        if (!fileName.contains("#")) {
286                            uri = new Path(path.toString() + "#" + fileName).toUri();
287                            uri = new URI(uri.getPath());
288                        }
289                        DistributedCache.addCacheFile(uri, conf);
290                    }
291                }
292                DistributedCache.createSymlink(conf);
293                return conf;
294            }
295            catch (Exception ex) {
296                XLog.getLog(getClass()).debug(
297                        "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf="
298                                + XmlUtils.prettyPrint(conf).toString());
299                throw convertException(ex);
300            }
301        }
302    
303        String getOozieLauncherJar(Context context) throws ActionExecutorException {
304            try {
305                return new Path(context.getActionDir(), getLauncherJarName()).toString();
306            }
307            catch (Exception ex) {
308                throw convertException(ex);
309            }
310        }
311    
312        void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
313            try {
314                Path actionDir = context.getActionDir();
315                Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp");
316                if (!actionFs.exists(actionDir)) {
317                    try {
318                        actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path(
319                                tempActionDir, getLauncherJarName()));
320                        actionFs.rename(tempActionDir, actionDir);
321                    }
322                    catch (IOException ex) {
323                        actionFs.delete(tempActionDir, true);
324                        actionFs.delete(actionDir, true);
325                        throw ex;
326                    }
327                }
328            }
329            catch (Exception ex) {
330                throw convertException(ex);
331            }
332        }
333    
334        void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
335            try {
336                Path actionDir = context.getActionDir();
337                if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false)
338                        && actionFs.exists(actionDir)) {
339                    actionFs.delete(actionDir, true);
340                }
341            }
342            catch (Exception ex) {
343                throw convertException(ex);
344            }
345        }
346    
347        @SuppressWarnings("unchecked")
348        void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf)
349                throws ActionExecutorException {
350            Configuration proto = context.getProtoActionConf();
351    
352            addToCache(conf, appPath, getOozieLauncherJar(context), false);
353    
354            String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST);
355            if (paths != null) {
356                for (String path : paths) {
357                    addToCache(conf, appPath, path, false);
358                }
359            }
360    
361            for (Element eProp : (List<Element>) actionXml.getChildren()) {
362                if (eProp.getName().equals("file")) {
363                    String path = eProp.getTextTrim();
364                    addToCache(conf, appPath, path, false);
365                }
366                else {
367                    if (eProp.getName().equals("archive")) {
368                        String path = eProp.getTextTrim();
369                        addToCache(conf, appPath, path, true);
370                    }
371                }
372            }
373        }
374    
375        protected String getLauncherMain(Configuration launcherConf, Element actionXml) {
376            Namespace ns = actionXml.getNamespace();
377            Element e = actionXml.getChild("main-class", ns);
378            return e.getTextTrim();
379        }
380    
381        private static final String QUEUE_NAME = "mapred.job.queue.name";
382        private static final String OOZIE_LAUNCHER_QUEUE_NAME = "oozie.launcher.mapred.job.queue.name";
383    
384        private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>();
385    
386        static {
387            SPECIAL_PROPERTIES.add(QUEUE_NAME);
388            SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal");
389            SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal");
390        }
391    
392        @SuppressWarnings("unchecked")
393        JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf)
394                throws ActionExecutorException {
395            try {
396                Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent();
397    
398                // launcher job configuration
399                Configuration launcherConf = createBaseHadoopConf(context, actionXml);
400                setupLauncherConf(launcherConf, actionXml, appPathRoot, context);
401    
402                // we are doing init+copy because if not we are getting 'hdfs'
403                // scheme not known
404                // its seems that new JobConf(Conf) does not load defaults, it
405                // assumes parameter Conf does.
406                JobConf launcherJobConf = new JobConf();
407                XConfiguration.copy(launcherConf, launcherJobConf);
408                setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf);
409                String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
410                        .getAppName(), action.getName(), context.getWorkflow().getId());
411                launcherJobConf.setJobName(jobName);
412    
413                String jobId = context.getWorkflow().getId();
414                String actionId = action.getId();
415                Path actionDir = context.getActionDir();
416                String recoveryId = context.getRecoveryId();
417    
418                LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf);
419    
420                LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml));
421    
422                LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
423    
424                Namespace ns = actionXml.getNamespace();
425                List<Element> list = actionXml.getChildren("arg", ns);
426                String[] args = new String[list.size()];
427                for (int i = 0; i < list.size(); i++) {
428                    args[i] = list.get(i).getTextTrim();
429                }
430                LauncherMapper.setupMainArguments(launcherJobConf, args);
431    
432                Element opt = actionXml.getChild("java-opts", ns);
433                if (opt != null) {
434                    String opts = launcherConf.get("mapred.child.java.opts", "");
435                    opts = opts + " " + opt.getTextTrim();
436                    opts = opts.trim();
437                    launcherJobConf.set("mapred.child.java.opts", opts);
438                }
439    
440                // properties from action that are needed by the launcher (QUEUE
441                // NAME)
442                // maybe we should add queue to the WF schema, below job-tracker
443                for (String name : SPECIAL_PROPERTIES) {
444                    String value = actionConf.get(name);
445                    if (value != null) {
446                        if (!name.equals(QUEUE_NAME) ||
447                            (name.equals(QUEUE_NAME) && launcherJobConf.get(OOZIE_LAUNCHER_QUEUE_NAME) == null)) {
448                            launcherJobConf.set(name, value);
449                        }
450                    }
451                }
452    
453                // to disable cancelation of delegation token on launcher job end
454                launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
455    
456                // setting the group owning the Oozie job to allow anybody in that
457                // group to kill the jobs.
458                launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
459    
460                return launcherJobConf;
461            }
462            catch (Exception ex) {
463                throw convertException(ex);
464            }
465        }
466    
467        private void injectCallback(Context context, Configuration conf) {
468            String callback = context.getCallbackUrl("$jobStatus");
469            if (conf.get("job.end.notification.url") != null) {
470                XLog.getLog(getClass()).warn("Overriding the action job end notification URI");
471            }
472            conf.set("job.end.notification.url", callback);
473        }
474    
475        void injectActionCallback(Context context, Configuration actionConf) {
476            injectCallback(context, actionConf);
477        }
478    
479        void injectLauncherCallback(Context context, Configuration launcherConf) {
480            injectCallback(context, launcherConf);
481        }
482    
483        void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException {
484            JobClient jobClient = null;
485            boolean exception = false;
486            try {
487                Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent();
488                Element actionXml = XmlUtils.parseXml(action.getConf());
489    
490                // action job configuration
491                Configuration actionConf = createBaseHadoopConf(context, actionXml);
492                setupActionConf(actionConf, context, actionXml, appPathRoot);
493                XLog.getLog(getClass()).debug("Setting LibFilesArchives ");
494                setLibFilesArchives(context, actionXml, appPathRoot, actionConf);
495                String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
496                        .getAppName(), action.getName(), context.getWorkflow().getId());
497                actionConf.set("mapred.job.name", jobName);
498                injectActionCallback(context, actionConf);
499    
500                // setting the group owning the Oozie job to allow anybody in that
501                // group to kill the jobs.
502                actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
503    
504                JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf);
505                injectLauncherCallback(context, launcherJobConf);
506                XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId());
507                jobClient = createJobClient(context, launcherJobConf);
508                String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context
509                        .getRecoveryId());
510                boolean alreadyRunning = launcherId != null;
511                RunningJob runningJob;
512    
513                if (alreadyRunning) {
514                    runningJob = jobClient.getJob(JobID.forName(launcherId));
515                    if (runningJob == null) {
516                        String jobTracker = launcherJobConf.get("mapred.job.tracker");
517                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
518                                                          "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker);
519                    }
520                }
521                else {
522                    prepare(context, actionXml);
523                    XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId());
524    
525                    // setting up propagation of the delegation token.
526                    AuthHelper.get().set(jobClient, launcherJobConf);
527                    log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = "
528                            + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
529                    log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = "
530                            + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
531                    runningJob = jobClient.submitJob(launcherJobConf);
532                    if (runningJob == null) {
533                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
534                                                          "Error submitting launcher for action [{0}]", action.getId());
535                    }
536                    launcherId = runningJob.getID().toString();
537                    XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId);
538                }
539    
540                String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER);
541                String consoleUrl = runningJob.getTrackingURL();
542                context.setStartData(launcherId, jobTracker, consoleUrl);
543            }
544            catch (Exception ex) {
545                exception = true;
546                throw convertException(ex);
547            }
548            finally {
549                if (jobClient != null) {
550                    try {
551                        jobClient.close();
552                    }
553                    catch (Exception e) {
554                        if (exception) {
555                            log.error("JobClient error: ", e);
556                        }
557                        else {
558                            throw convertException(e);
559                        }
560                    }
561                }
562            }
563        }
564    
565        void prepare(Context context, Element actionXml) throws ActionExecutorException {
566            Namespace ns = actionXml.getNamespace();
567            Element prepare = actionXml.getChild("prepare", ns);
568            if (prepare != null) {
569                XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation");
570                FsActionExecutor fsAe = new FsActionExecutor();
571                fsAe.doOperations(context, prepare);
572                XLog.getLog(getClass()).debug("FS Operation is completed");
573            }
574        }
575    
576        @Override
577        public void start(Context context, WorkflowAction action) throws ActionExecutorException {
578            try {
579                XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System");
580                FileSystem actionFs = getActionFileSystem(context, action);
581                XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir());
582                prepareActionDir(actionFs, context);
583                XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action ");
584                submitLauncher(context, action);
585                XLog.getLog(getClass()).debug("Action submit completed. Performing check ");
586                check(context, action);
587                XLog.getLog(getClass()).debug("Action check is done after submission");
588            }
589            catch (Exception ex) {
590                throw convertException(ex);
591            }
592        }
593    
594        @Override
595        public void end(Context context, WorkflowAction action) throws ActionExecutorException {
596            try {
597                String externalStatus = action.getExternalStatus();
598                WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK
599                                               : WorkflowAction.Status.ERROR;
600                context.setEndData(status, getActionSignal(status));
601            }
602            catch (Exception ex) {
603                throw convertException(ex);
604            }
605            finally {
606                try {
607                    FileSystem actionFs = getActionFileSystem(context, action);
608                    cleanUpActionDir(actionFs, context);
609                }
610                catch (Exception ex) {
611                    throw convertException(ex);
612                }
613            }
614        }
615    
616        /**
617         * Create job client object
618         * @param context
619         * @param jobConf
620         * @return
621         * @throws HadoopAccessorException
622         */
623        protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException {
624            String user = context.getWorkflow().getUser();
625            String group = context.getWorkflow().getGroup();
626            return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf);
627        }
628    
629        @Override
630        public void check(Context context, WorkflowAction action) throws ActionExecutorException {
631            JobClient jobClient = null;
632            boolean exception = false;
633            try {
634                Element actionXml = XmlUtils.parseXml(action.getConf());
635                FileSystem actionFs = getActionFileSystem(context, actionXml);
636                Configuration conf = createBaseHadoopConf(context, actionXml);
637                JobConf jobConf = new JobConf();
638                XConfiguration.copy(conf, jobConf);
639                jobClient = createJobClient(context, jobConf);
640                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
641                if (runningJob == null) {
642                    context.setExternalStatus(FAILED);
643                    context.setExecutionData(FAILED, null);
644                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
645                                                      "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", action
646                            .getExternalId(), action.getId());
647                }
648                if (runningJob.isComplete()) {
649                    Path actionDir = context.getActionDir();
650    
651                    String user = context.getWorkflow().getUser();
652                    String group = context.getWorkflow().getGroup();
653                    if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) {
654                        String launcherId = action.getExternalId();
655                        Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir());
656                        InputStream is = actionFs.open(idSwapPath);
657                        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
658                        Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
659                        reader.close();
660                        String newId = props.getProperty("id");
661                        runningJob = jobClient.getJob(JobID.forName(newId));
662                        if (runningJob == null) {
663                            context.setExternalStatus(FAILED);
664                            throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
665                                                              "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", newId,
666                                                              action.getId());
667                        }
668    
669                        context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL());
670                        XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId,
671                                                     newId);
672                    }
673                    if (runningJob.isComplete()) {
674                        XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]",
675                                                     action.getExternalId());
676                        if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) {
677                            Properties props = null;
678                            if (getCaptureOutput(action)) {
679                                props = new Properties();
680                                if (LauncherMapper.hasOutputData(runningJob)) {
681                                    Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir());
682                                    InputStream is = actionFs.open(actionOutput);
683                                    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
684                                    props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
685                                    reader.close();
686                                }
687                            }
688                            context.setExecutionData(SUCCEEDED, props);
689                            XLog.getLog(getClass()).info(XLog.STD, "action produced output");
690                        }
691                        else {
692                            XLog log = XLog.getLog(getClass());
693                            String errorReason;
694                            Path actionError = LauncherMapper.getErrorPath(context.getActionDir());
695                            if (actionFs.exists(actionError)) {
696                                InputStream is = actionFs.open(actionError);
697                                BufferedReader reader = new BufferedReader(new InputStreamReader(is));
698                                Properties props = PropertiesUtils.readProperties(reader, -1);
699                                reader.close();
700                                String errorCode = props.getProperty("error.code");
701                                if (errorCode.equals("0")) {
702                                    errorCode = "JA018";
703                                }
704                                errorReason = props.getProperty("error.reason");
705                                log.warn("Launcher ERROR, reason: {0}", errorReason);
706                                String exMsg = props.getProperty("exception.message");
707                                String errorInfo = (exMsg != null) ? exMsg : errorReason;
708                                context.setErrorInfo(errorCode, errorInfo);
709                                String exStackTrace = props.getProperty("exception.stacktrace");
710                                if (exMsg != null) {
711                                    log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace);
712                                }
713                            }
714                            else {
715                                errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action
716                                        .getTrackerUri(), action.getExternalId());
717                                log.warn(errorReason);
718                            }
719                            context.setExecutionData(FAILED_KILLED, null);
720                        }
721                    }
722                    else {
723                        context.setExternalStatus(RUNNING);
724                        XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
725                                                     action.getExternalId(), action.getExternalStatus());
726                    }
727                }
728                else {
729                    context.setExternalStatus(RUNNING);
730                    XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
731                                                 action.getExternalId(), action.getExternalStatus());
732                }
733            }
734            catch (Exception ex) {
735                XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex);
736                exception = true;
737                throw convertException(ex);
738            }
739            finally {
740                if (jobClient != null) {
741                    try {
742                        jobClient.close();
743                    }
744                    catch (Exception e) {
745                        if (exception) {
746                            log.error("JobClient error: ", e);
747                        }
748                        else {
749                            throw convertException(e);
750                        }
751                    }
752                }
753            }
754        }
755    
756        protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException {
757            Element eConf = XmlUtils.parseXml(action.getConf());
758            Namespace ns = eConf.getNamespace();
759            Element captureOutput = eConf.getChild("capture-output", ns);
760            return captureOutput != null;
761        }
762    
763        @Override
764        public void kill(Context context, WorkflowAction action) throws ActionExecutorException {
765            JobClient jobClient = null;
766            boolean exception = false;
767            try {
768                Element actionXml = XmlUtils.parseXml(action.getConf());
769                Configuration conf = createBaseHadoopConf(context, actionXml);
770                JobConf jobConf = new JobConf();
771                XConfiguration.copy(conf, jobConf);
772                jobClient = createJobClient(context, jobConf);
773                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
774                if (runningJob != null) {
775                    runningJob.killJob();
776                }
777                context.setExternalStatus(KILLED);
778                context.setExecutionData(KILLED, null);
779            }
780            catch (Exception ex) {
781                exception = true;
782                throw convertException(ex);
783            }
784            finally {
785                try {
786                    FileSystem actionFs = getActionFileSystem(context, action);
787                    cleanUpActionDir(actionFs, context);
788                    if (jobClient != null) {
789                        jobClient.close();
790                    }
791                }
792                catch (Exception ex) {
793                    if (exception) {
794                        log.error("Error: ", ex);
795                    }
796                    else {
797                        throw convertException(ex);
798                    }
799                }
800            }
801        }
802    
803        private static Set<String> FINAL_STATUS = new HashSet<String>();
804    
805        static {
806            FINAL_STATUS.add(SUCCEEDED);
807            FINAL_STATUS.add(KILLED);
808            FINAL_STATUS.add(FAILED);
809            FINAL_STATUS.add(FAILED_KILLED);
810        }
811    
812        @Override
813        public boolean isCompleted(String externalStatus) {
814            return FINAL_STATUS.contains(externalStatus);
815        }
816    
817    }