001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.action.hadoop;
016    
017    import java.io.BufferedReader;
018    import java.io.File;
019    import java.io.FileNotFoundException;
020    import java.io.IOException;
021    import java.io.InputStream;
022    import java.io.InputStreamReader;
023    import java.io.StringReader;
024    import java.net.ConnectException;
025    import java.net.URI;
026    import java.net.UnknownHostException;
027    import java.util.ArrayList;
028    import java.util.HashSet;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.Properties;
032    import java.util.Set;
033    
034    import org.apache.hadoop.conf.Configuration;
035    import org.apache.hadoop.filecache.DistributedCache;
036    import org.apache.hadoop.fs.FileSystem;
037    import org.apache.hadoop.fs.Path;
038    import org.apache.hadoop.fs.permission.AccessControlException;
039    import org.apache.hadoop.mapred.JobClient;
040    import org.apache.hadoop.mapred.JobConf;
041    import org.apache.hadoop.mapred.JobID;
042    import org.apache.hadoop.mapred.RunningJob;
043    import org.apache.hadoop.util.DiskChecker;
044    import org.apache.oozie.action.ActionExecutor;
045    import org.apache.oozie.action.ActionExecutorException;
046    import org.apache.oozie.client.OozieClient;
047    import org.apache.oozie.client.WorkflowAction;
048    import org.apache.oozie.service.HadoopAccessorException;
049    import org.apache.oozie.service.HadoopAccessorService;
050    import org.apache.oozie.service.Services;
051    import org.apache.oozie.service.WorkflowAppService;
052    import org.apache.oozie.servlet.CallbackServlet;
053    import org.apache.oozie.util.IOUtils;
054    import org.apache.oozie.util.PropertiesUtils;
055    import org.apache.oozie.util.XConfiguration;
056    import org.apache.oozie.util.XLog;
057    import org.apache.oozie.util.XmlUtils;
058    import org.jdom.Element;
059    import org.jdom.JDOMException;
060    import org.jdom.Namespace;
061    
062    public class JavaActionExecutor extends ActionExecutor {
063    
064        private static final String HADOOP_USER = "user.name";
065        private static final String HADOOP_UGI = "hadoop.job.ugi";
066        private static final String HADOOP_JOB_TRACKER = "mapred.job.tracker";
067        private static final String HADOOP_NAME_NODE = "fs.default.name";
068    
069        private static final Set<String> DISALLOWED_PROPERTIES = new HashSet<String>();
070    
071        private static int maxActionOutputLen;
072    
073        private static final String SUCCEEDED = "SUCCEEDED";
074        private static final String KILLED = "KILLED";
075        private static final String FAILED = "FAILED";
076        private static final String FAILED_KILLED = "FAILED/KILLED";
077        private static final String RUNNING = "RUNNING";
078        private XLog log = XLog.getLog(getClass());
079    
080        static {
081            DISALLOWED_PROPERTIES.add(HADOOP_USER);
082            DISALLOWED_PROPERTIES.add(HADOOP_UGI);
083            DISALLOWED_PROPERTIES.add(HADOOP_JOB_TRACKER);
084            DISALLOWED_PROPERTIES.add(HADOOP_NAME_NODE);
085            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_JT_KERBEROS_NAME);
086            DISALLOWED_PROPERTIES.add(WorkflowAppService.HADOOP_NN_KERBEROS_NAME);
087        }
088    
089        public JavaActionExecutor() {
090            this("java");
091        }
092    
093        protected JavaActionExecutor(String type) {
094            super(type);
095        }
096    
097        protected String getLauncherJarName() {
098            return getType() + "-launcher.jar";
099        }
100    
101        protected List<Class> getLauncherClasses() {
102            List<Class> classes = new ArrayList<Class>();
103            classes.add(LauncherMapper.class);
104            classes.add(LauncherSecurityManager.class);
105            classes.add(LauncherException.class);
106            classes.add(LauncherMainException.class);
107            return classes;
108        }
109    
110        @Override
111        public void initActionType() {
112            super.initActionType();
113            maxActionOutputLen = getOozieConf().getInt(CallbackServlet.CONF_MAX_DATA_LEN, 2 * 1024);
114            try {
115                List<Class> classes = getLauncherClasses();
116                Class[] launcherClasses = classes.toArray(new Class[classes.size()]);
117                IOUtils.createJar(new File(getOozieRuntimeDir()), getLauncherJarName(), launcherClasses);
118    
119                registerError(UnknownHostException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA001");
120                registerError(AccessControlException.class.getName(), ActionExecutorException.ErrorType.NON_TRANSIENT,
121                              "JA002");
122                registerError(DiskChecker.DiskOutOfSpaceException.class.getName(),
123                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA003");
124                registerError(org.apache.hadoop.hdfs.protocol.QuotaExceededException.class.getName(),
125                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA004");
126                registerError(org.apache.hadoop.hdfs.server.namenode.SafeModeException.class.getName(),
127                              ActionExecutorException.ErrorType.NON_TRANSIENT, "JA005");
128                registerError(ConnectException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA006");
129                registerError(JDOMException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA007");
130                registerError(FileNotFoundException.class.getName(), ActionExecutorException.ErrorType.ERROR, "JA008");
131                registerError(IOException.class.getName(), ActionExecutorException.ErrorType.TRANSIENT, "JA009");
132            }
133            catch (IOException ex) {
134                throw new RuntimeException(ex);
135            }
136        }
137    
138        void checkForDisallowedProps(Configuration conf, String confName) throws ActionExecutorException {
139            for (String prop : DISALLOWED_PROPERTIES) {
140                if (conf.get(prop) != null) {
141                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA010",
142                                                      "Property [{0}] not allowed in action [{1}] configuration", prop, confName);
143                }
144            }
145        }
146    
147        Configuration createBaseHadoopConf(Context context, Element actionXml) {
148            Configuration conf = new XConfiguration();
149            conf.set(HADOOP_USER, context.getProtoActionConf().get(WorkflowAppService.HADOOP_USER));
150            conf.set(HADOOP_UGI, context.getProtoActionConf().get(WorkflowAppService.HADOOP_UGI));
151            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME) != null) {
152                conf.set(WorkflowAppService.HADOOP_JT_KERBEROS_NAME, context.getProtoActionConf().get(
153                        WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
154            }
155            if (context.getProtoActionConf().get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME) != null) {
156                conf.set(WorkflowAppService.HADOOP_NN_KERBEROS_NAME, context.getProtoActionConf().get(
157                        WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
158            }
159            conf.set(OozieClient.GROUP_NAME, context.getProtoActionConf().get(OozieClient.GROUP_NAME));
160            Namespace ns = actionXml.getNamespace();
161            String jobTracker = actionXml.getChild("job-tracker", ns).getTextTrim();
162            String nameNode = actionXml.getChild("name-node", ns).getTextTrim();
163            conf.set(HADOOP_JOB_TRACKER, jobTracker);
164            conf.set(HADOOP_NAME_NODE, nameNode);
165            conf.set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "true");
166            return conf;
167        }
168    
169        Configuration setupLauncherConf(Configuration conf, Element actionXml, Path appPath, Context context) throws ActionExecutorException {
170            try {
171                Namespace ns = actionXml.getNamespace();
172                Element e = actionXml.getChild("configuration", ns);
173                if (e != null) {
174                    String strConf = XmlUtils.prettyPrint(e).toString();
175                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
176    
177                    XConfiguration launcherConf = new XConfiguration();
178                    for (Map.Entry<String, String> entry : inlineConf) {
179                        if (entry.getKey().startsWith("oozie.launcher.")) {
180                            String name = entry.getKey().substring("oozie.launcher.".length());
181                            String value = entry.getValue();
182                            // setting original KEY
183                            launcherConf.set(entry.getKey(), value);
184                            // setting un-prefixed key (to allow Hadoop job config
185                            // for the launcher job
186                            launcherConf.set(name, value);
187                        }
188                    }
189                    checkForDisallowedProps(launcherConf, "inline launcher configuration");
190                    XConfiguration.copy(launcherConf, conf);
191                }
192                return conf;
193            }
194            catch (IOException ex) {
195                throw convertException(ex);
196            }
197        }
198    
199        protected FileSystem getActionFileSystem(Context context, WorkflowAction action) throws ActionExecutorException {
200            try {
201                Element actionXml = XmlUtils.parseXml(action.getConf());
202                return getActionFileSystem(context, actionXml);
203            }
204            catch (JDOMException ex) {
205                throw convertException(ex);
206            }
207        }
208    
209        protected FileSystem getActionFileSystem(Context context, Element actionXml) throws ActionExecutorException {
210            try {
211                return context.getAppFileSystem();
212            }
213            catch (Exception ex) {
214                throw convertException(ex);
215            }
216        }
217    
218        Configuration setupActionConf(Configuration actionConf, Context context, Element actionXml, Path appPath)
219                throws ActionExecutorException {
220            try {
221                Namespace ns = actionXml.getNamespace();
222                Element e = actionXml.getChild("job-xml", ns);
223                if (e != null) {
224                    String jobXml = e.getTextTrim();
225                    Path path = new Path(appPath, jobXml);
226                    FileSystem fs = getActionFileSystem(context, actionXml);
227                    Configuration jobXmlConf = new XConfiguration(fs.open(path));
228                    checkForDisallowedProps(jobXmlConf, "job-xml");
229                    XConfiguration.copy(jobXmlConf, actionConf);
230                }
231                e = actionXml.getChild("configuration", ns);
232                if (e != null) {
233                    String strConf = XmlUtils.prettyPrint(e).toString();
234                    XConfiguration inlineConf = new XConfiguration(new StringReader(strConf));
235                    checkForDisallowedProps(inlineConf, "inline configuration");
236                    XConfiguration.copy(inlineConf, actionConf);
237                }
238                return actionConf;
239            }
240            catch (IOException ex) {
241                throw convertException(ex);
242            }
243        }
244    
245        Configuration addToCache(Configuration conf, Path appPath, String filePath, boolean archive)
246                throws ActionExecutorException {
247            Path path = null;
248            try {
249                if (filePath.startsWith("/")) {
250                    path = new Path(filePath);
251                }
252                else {
253                    path = new Path(appPath, filePath);
254                }
255                URI uri = new URI(path.toUri().getPath());
256                if (archive) {
257                    DistributedCache.addCacheArchive(uri, conf);
258                }
259                else {
260                    String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
261                    if (fileName.endsWith(".so") || fileName.contains(".so.")) {  // .so files
262                        if (!fileName.endsWith(".so")) {
263                            int extAt = fileName.indexOf(".so.");
264                            fileName = fileName.substring(0, extAt + 3);
265                        }
266                        uri = new Path(path.toString() + "#" + fileName).toUri();
267                        uri = new URI(uri.getPath());
268                        DistributedCache.addCacheFile(uri, conf);
269                    }
270                    else if (fileName.endsWith(".jar")){  // .jar files
271                        if (!fileName.contains("#")) {
272                            path = new Path(uri.toString());
273    
274                            String user = conf.get("user.name");
275                            String group = conf.get("group.name");
276                            Services.get().get(HadoopAccessorService.class).addFileToClassPath(user, group, path, conf);
277                        }
278                        else {
279                            DistributedCache.addCacheFile(uri, conf);
280                        }
281                    }
282                    else { // regular files
283                        if (!fileName.contains("#")) {
284                            uri = new Path(path.toString() + "#" + fileName).toUri();
285                            uri = new URI(uri.getPath());
286                        }
287                        DistributedCache.addCacheFile(uri, conf);
288                    }
289                }
290                DistributedCache.createSymlink(conf);
291                return conf;
292            }
293            catch (Exception ex) {
294                XLog.getLog(getClass()).debug(
295                        "Errors when add to DistributedCache. Path=" + path + ", archive=" + archive + ", conf="
296                                + XmlUtils.prettyPrint(conf).toString());
297                throw convertException(ex);
298            }
299        }
300    
301        String getOozieLauncherJar(Context context) throws ActionExecutorException {
302            try {
303                return new Path(context.getActionDir(), getLauncherJarName()).toString();
304            }
305            catch (Exception ex) {
306                throw convertException(ex);
307            }
308        }
309    
310        void prepareActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
311            try {
312                Path actionDir = context.getActionDir();
313                Path tempActionDir = new Path(actionDir.getParent(), actionDir.getName() + ".tmp");
314                if (!actionFs.exists(actionDir)) {
315                    try {
316                        actionFs.copyFromLocalFile(new Path(getOozieRuntimeDir(), getLauncherJarName()), new Path(
317                                tempActionDir, getLauncherJarName()));
318                        actionFs.rename(tempActionDir, actionDir);
319                    }
320                    catch (IOException ex) {
321                        actionFs.delete(tempActionDir, true);
322                        actionFs.delete(actionDir, true);
323                        throw ex;
324                    }
325                }
326            }
327            catch (Exception ex) {
328                throw convertException(ex);
329            }
330        }
331    
332        void cleanUpActionDir(FileSystem actionFs, Context context) throws ActionExecutorException {
333            try {
334                Path actionDir = context.getActionDir();
335                if (!context.getProtoActionConf().getBoolean("oozie.action.keep.action.dir", false)
336                        && actionFs.exists(actionDir)) {
337                    actionFs.delete(actionDir, true);
338                }
339            }
340            catch (Exception ex) {
341                throw convertException(ex);
342            }
343        }
344    
345        @SuppressWarnings("unchecked")
346        void setLibFilesArchives(Context context, Element actionXml, Path appPath, Configuration conf)
347                throws ActionExecutorException {
348            Configuration proto = context.getProtoActionConf();
349    
350            addToCache(conf, appPath, getOozieLauncherJar(context), false);
351    
352            String[] paths = proto.getStrings(WorkflowAppService.APP_LIB_PATH_LIST);
353            if (paths != null) {
354                for (String path : paths) {
355                    addToCache(conf, appPath, path, false);
356                }
357            }
358    
359            for (Element eProp : (List<Element>) actionXml.getChildren()) {
360                if (eProp.getName().equals("file")) {
361                    String path = eProp.getTextTrim();
362                    addToCache(conf, appPath, path, false);
363                }
364                else {
365                    if (eProp.getName().equals("archive")) {
366                        String path = eProp.getTextTrim();
367                        addToCache(conf, appPath, path, true);
368                    }
369                }
370            }
371        }
372    
373        protected String getLauncherMain(Configuration launcherConf, Element actionXml) {
374            Namespace ns = actionXml.getNamespace();
375            Element e = actionXml.getChild("main-class", ns);
376            return e.getTextTrim();
377        }
378    
379        private static final String QUEUE_NAME = "mapred.job.queue.name";
380        private static final String OOZIE_LAUNCHER_QUEUE_NAME = "oozie.launcher.mapred.job.queue.name";
381    
382        private static final Set<String> SPECIAL_PROPERTIES = new HashSet<String>();
383    
384        static {
385            SPECIAL_PROPERTIES.add(QUEUE_NAME);
386            SPECIAL_PROPERTIES.add("mapreduce.jobtracker.kerberos.principal");
387            SPECIAL_PROPERTIES.add("dfs.namenode.kerberos.principal");
388        }
389    
390        @SuppressWarnings("unchecked")
391        JobConf createLauncherConf(Context context, WorkflowAction action, Element actionXml, Configuration actionConf)
392                throws ActionExecutorException {
393            try {
394                Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent();
395    
396                // launcher job configuration
397                Configuration launcherConf = createBaseHadoopConf(context, actionXml);
398                setupLauncherConf(launcherConf, actionXml, appPathRoot, context);
399    
400                // we are doing init+copy because if not we are getting 'hdfs'
401                // scheme not known
402                // its seems that new JobConf(Conf) does not load defaults, it
403                // assumes parameter Conf does.
404                JobConf launcherJobConf = new JobConf();
405                XConfiguration.copy(launcherConf, launcherJobConf);
406                setLibFilesArchives(context, actionXml, appPathRoot, launcherJobConf);
407                String jobName = XLog.format("oozie:launcher:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
408                        .getAppName(), action.getName(), context.getWorkflow().getId());
409                launcherJobConf.setJobName(jobName);
410    
411                String jobId = context.getWorkflow().getId();
412                String actionId = action.getId();
413                Path actionDir = context.getActionDir();
414                String recoveryId = context.getRecoveryId();
415    
416                LauncherMapper.setupLauncherInfo(launcherJobConf, jobId, actionId, actionDir, recoveryId, actionConf);
417    
418                LauncherMapper.setupMainClass(launcherJobConf, getLauncherMain(launcherConf, actionXml));
419    
420                LauncherMapper.setupMaxOutputData(launcherJobConf, maxActionOutputLen);
421    
422                Namespace ns = actionXml.getNamespace();
423                List<Element> list = actionXml.getChildren("arg", ns);
424                String[] args = new String[list.size()];
425                for (int i = 0; i < list.size(); i++) {
426                    args[i] = list.get(i).getTextTrim();
427                }
428                LauncherMapper.setupMainArguments(launcherJobConf, args);
429    
430                Element opt = actionXml.getChild("java-opts", ns);
431                if (opt != null) {
432                    String opts = launcherConf.get("mapred.child.java.opts", "");
433                    opts = opts + " " + opt.getTextTrim();
434                    opts = opts.trim();
435                    launcherJobConf.set("mapred.child.java.opts", opts);
436                }
437    
438                // properties from action that are needed by the launcher (QUEUE
439                // NAME)
440                // maybe we should add queue to the WF schema, below job-tracker
441                for (String name : SPECIAL_PROPERTIES) {
442                    String value = actionConf.get(name);
443                    if (value != null) {
444                        if (!name.equals(QUEUE_NAME) ||
445                            (name.equals(QUEUE_NAME) && launcherJobConf.get(OOZIE_LAUNCHER_QUEUE_NAME) == null)) {
446                            launcherJobConf.set(name, value);
447                        }
448                    }
449                }
450    
451                // to disable cancelation of delegation token on launcher job end
452                launcherJobConf.setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false);
453    
454                // setting the group owning the Oozie job to allow anybody in that
455                // group to kill the jobs.
456                launcherJobConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
457    
458                return launcherJobConf;
459            }
460            catch (Exception ex) {
461                throw convertException(ex);
462            }
463        }
464    
465        private void injectCallback(Context context, Configuration conf) {
466            String callback = context.getCallbackUrl("$jobStatus");
467            if (conf.get("job.end.notification.url") != null) {
468                XLog.getLog(getClass()).warn("Overriding the action job end notification URI");
469            }
470            conf.set("job.end.notification.url", callback);
471        }
472    
473        void injectActionCallback(Context context, Configuration actionConf) {
474            injectCallback(context, actionConf);
475        }
476    
477        void injectLauncherCallback(Context context, Configuration launcherConf) {
478            injectCallback(context, launcherConf);
479        }
480    
481        void submitLauncher(Context context, WorkflowAction action) throws ActionExecutorException {
482            JobClient jobClient = null;
483            boolean exception = false;
484            try {
485                Path appPathRoot = new Path(context.getWorkflow().getAppPath()).getParent();
486                Element actionXml = XmlUtils.parseXml(action.getConf());
487    
488                // action job configuration
489                Configuration actionConf = createBaseHadoopConf(context, actionXml);
490                setupActionConf(actionConf, context, actionXml, appPathRoot);
491                XLog.getLog(getClass()).debug("Setting LibFilesArchives ");
492                setLibFilesArchives(context, actionXml, appPathRoot, actionConf);
493                String jobName = XLog.format("oozie:action:T={0}:W={1}:A={2}:ID={3}", getType(), context.getWorkflow()
494                        .getAppName(), action.getName(), context.getWorkflow().getId());
495                actionConf.set("mapred.job.name", jobName);
496                injectActionCallback(context, actionConf);
497    
498                // setting the group owning the Oozie job to allow anybody in that
499                // group to kill the jobs.
500                actionConf.set("mapreduce.job.acl-modify-job", context.getWorkflow().getGroup());
501    
502                JobConf launcherJobConf = createLauncherConf(context, action, actionXml, actionConf);
503                injectLauncherCallback(context, launcherJobConf);
504                XLog.getLog(getClass()).debug("Creating Job Client for action " + action.getId());
505                jobClient = createJobClient(context, launcherJobConf);
506                String launcherId = LauncherMapper.getRecoveryId(launcherJobConf, context.getActionDir(), context
507                        .getRecoveryId());
508                boolean alreadyRunning = launcherId != null;
509                RunningJob runningJob;
510    
511                if (alreadyRunning) {
512                    runningJob = jobClient.getJob(JobID.forName(launcherId));
513                    if (runningJob == null) {
514                        String jobTracker = launcherJobConf.get("mapred.job.tracker");
515                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
516                                                          "unknown job [{0}@{1}], cannot recover", launcherId, jobTracker);
517                    }
518                }
519                else {
520                    prepare(context, actionXml);
521                    XLog.getLog(getClass()).debug("Submitting the job through Job Client for action " + action.getId());
522    
523                    // setting up propagation of the delegation token.
524                    AuthHelper.get().set(jobClient, launcherJobConf);
525                    log.debug(WorkflowAppService.HADOOP_JT_KERBEROS_NAME + " = "
526                            + launcherJobConf.get(WorkflowAppService.HADOOP_JT_KERBEROS_NAME));
527                    log.debug(WorkflowAppService.HADOOP_NN_KERBEROS_NAME + " = "
528                            + launcherJobConf.get(WorkflowAppService.HADOOP_NN_KERBEROS_NAME));
529                    runningJob = jobClient.submitJob(launcherJobConf);
530                    if (runningJob == null) {
531                        throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, "JA017",
532                                                          "Error submitting launcher for action [{0}]", action.getId());
533                    }
534                    launcherId = runningJob.getID().toString();
535                    XLog.getLog(getClass()).debug("After submission get the launcherId " + launcherId);
536                }
537    
538                String jobTracker = launcherJobConf.get(HADOOP_JOB_TRACKER);
539                String consoleUrl = runningJob.getTrackingURL();
540                context.setStartData(launcherId, jobTracker, consoleUrl);
541            }
542            catch (Exception ex) {
543                exception = true;
544                throw convertException(ex);
545            }
546            finally {
547                if (jobClient != null) {
548                    try {
549                        jobClient.close();
550                    }
551                    catch (Exception e) {
552                        if (exception) {
553                            log.error("JobClient error: ", e);
554                        }
555                        else {
556                            throw convertException(e);
557                        }
558                    }
559                }
560            }
561        }
562    
563        void prepare(Context context, Element actionXml) throws ActionExecutorException {
564            Namespace ns = actionXml.getNamespace();
565            Element prepare = actionXml.getChild("prepare", ns);
566            if (prepare != null) {
567                XLog.getLog(getClass()).debug("Preparing the action with FileSystem operation");
568                FsActionExecutor fsAe = new FsActionExecutor();
569                fsAe.doOperations(context, prepare);
570                XLog.getLog(getClass()).debug("FS Operation is completed");
571            }
572        }
573    
574        @Override
575        public void start(Context context, WorkflowAction action) throws ActionExecutorException {
576            try {
577                XLog.getLog(getClass()).debug("Starting action " + action.getId() + " getting Action File System");
578                FileSystem actionFs = getActionFileSystem(context, action);
579                XLog.getLog(getClass()).debug("Preparing action Dir through copying " + context.getActionDir());
580                prepareActionDir(actionFs, context);
581                XLog.getLog(getClass()).debug("Action Dir is ready. Submitting the action ");
582                submitLauncher(context, action);
583                XLog.getLog(getClass()).debug("Action submit completed. Performing check ");
584                check(context, action);
585                XLog.getLog(getClass()).debug("Action check is done after submission");
586            }
587            catch (Exception ex) {
588                throw convertException(ex);
589            }
590        }
591    
592        @Override
593        public void end(Context context, WorkflowAction action) throws ActionExecutorException {
594            try {
595                String externalStatus = action.getExternalStatus();
596                WorkflowAction.Status status = externalStatus.equals(SUCCEEDED) ? WorkflowAction.Status.OK
597                                               : WorkflowAction.Status.ERROR;
598                context.setEndData(status, getActionSignal(status));
599            }
600            catch (Exception ex) {
601                throw convertException(ex);
602            }
603            finally {
604                try {
605                    FileSystem actionFs = getActionFileSystem(context, action);
606                    cleanUpActionDir(actionFs, context);
607                }
608                catch (Exception ex) {
609                    throw convertException(ex);
610                }
611            }
612        }
613    
614        /**
615         * Create job client object
616         * @param context
617         * @param jobConf
618         * @return
619         * @throws HadoopAccessorException
620         */
621        protected JobClient createJobClient(Context context, JobConf jobConf) throws HadoopAccessorException {
622            String user = context.getWorkflow().getUser();
623            String group = context.getWorkflow().getGroup();
624            return Services.get().get(HadoopAccessorService.class).createJobClient(user, group, jobConf);
625        }
626    
627        @Override
628        public void check(Context context, WorkflowAction action) throws ActionExecutorException {
629            JobClient jobClient = null;
630            boolean exception = false;
631            try {
632                Element actionXml = XmlUtils.parseXml(action.getConf());
633                FileSystem actionFs = getActionFileSystem(context, actionXml);
634                Configuration conf = createBaseHadoopConf(context, actionXml);
635                JobConf jobConf = new JobConf();
636                XConfiguration.copy(conf, jobConf);
637                jobClient = createJobClient(context, jobConf);
638                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
639                if (runningJob == null) {
640                    context.setExternalStatus(FAILED);
641                    context.setExecutionData(FAILED, null);
642                    throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
643                                                      "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", action
644                            .getExternalId(), action.getId());
645                }
646                if (runningJob.isComplete()) {
647                    Path actionDir = context.getActionDir();
648    
649                    String user = context.getWorkflow().getUser();
650                    String group = context.getWorkflow().getGroup();
651                    if (LauncherMapper.hasIdSwap(runningJob, user, group, actionDir)) {
652                        String launcherId = action.getExternalId();
653                        Path idSwapPath = LauncherMapper.getIdSwapPath(context.getActionDir());
654                        InputStream is = actionFs.open(idSwapPath);
655                        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
656                        Properties props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
657                        reader.close();
658                        String newId = props.getProperty("id");
659                        runningJob = jobClient.getJob(JobID.forName(newId));
660                        if (runningJob == null) {
661                            context.setExternalStatus(FAILED);
662                            throw new ActionExecutorException(ActionExecutorException.ErrorType.FAILED, "JA017",
663                                                              "Unknown hadoop job [{0}] associated with action [{1}].  Failing this action!", newId,
664                                                              action.getId());
665                        }
666    
667                        context.setStartData(newId, action.getTrackerUri(), runningJob.getTrackingURL());
668                        XLog.getLog(getClass()).info(XLog.STD, "External ID swap, old ID [{0}] new ID [{1}]", launcherId,
669                                                     newId);
670                    }
671                    if (runningJob.isComplete()) {
672                        XLog.getLog(getClass()).info(XLog.STD, "action completed, external ID [{0}]",
673                                                     action.getExternalId());
674                        if (runningJob.isSuccessful() && LauncherMapper.isMainSuccessful(runningJob)) {
675                            Properties props = null;
676                            if (getCaptureOutput(action)) {
677                                props = new Properties();
678                                if (LauncherMapper.hasOutputData(runningJob)) {
679                                    Path actionOutput = LauncherMapper.getOutputDataPath(context.getActionDir());
680                                    InputStream is = actionFs.open(actionOutput);
681                                    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
682                                    props = PropertiesUtils.readProperties(reader, maxActionOutputLen);
683                                    reader.close();
684                                }
685                            }
686                            context.setExecutionData(SUCCEEDED, props);
687                            XLog.getLog(getClass()).info(XLog.STD, "action produced output");
688                        }
689                        else {
690                            XLog log = XLog.getLog(getClass());
691                            String errorReason;
692                            Path actionError = LauncherMapper.getErrorPath(context.getActionDir());
693                            if (actionFs.exists(actionError)) {
694                                InputStream is = actionFs.open(actionError);
695                                BufferedReader reader = new BufferedReader(new InputStreamReader(is));
696                                Properties props = PropertiesUtils.readProperties(reader, -1);
697                                reader.close();
698                                String errorCode = props.getProperty("error.code");
699                                if (errorCode.equals("0")) {
700                                    errorCode = "JA018";
701                                }
702                                errorReason = props.getProperty("error.reason");
703                                log.warn("Launcher ERROR, reason: {0}", errorReason);
704                                String exMsg = props.getProperty("exception.message");
705                                String errorInfo = (exMsg != null) ? exMsg : errorReason;
706                                context.setErrorInfo(errorCode, errorInfo);
707                                String exStackTrace = props.getProperty("exception.stacktrace");
708                                if (exMsg != null) {
709                                    log.warn("Launcher exception: {0}{E}{1}", exMsg, exStackTrace);
710                                }
711                            }
712                            else {
713                                errorReason = XLog.format("LauncherMapper died, check Hadoop log for job [{0}:{1}]", action
714                                        .getTrackerUri(), action.getExternalId());
715                                log.warn(errorReason);
716                            }
717                            context.setExecutionData(FAILED_KILLED, null);
718                        }
719                    }
720                    else {
721                        context.setExternalStatus(RUNNING);
722                        XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
723                                                     action.getExternalId(), action.getExternalStatus());
724                    }
725                }
726                else {
727                    context.setExternalStatus(RUNNING);
728                    XLog.getLog(getClass()).info(XLog.STD, "checking action, external ID [{0}] status [{1}]",
729                                                 action.getExternalId(), action.getExternalStatus());
730                }
731            }
732            catch (Exception ex) {
733                XLog.getLog(getClass()).warn("Exception in check(). Message[{0}]", ex.getMessage(), ex);
734                exception = true;
735                throw convertException(ex);
736            }
737            finally {
738                if (jobClient != null) {
739                    try {
740                        jobClient.close();
741                    }
742                    catch (Exception e) {
743                        if (exception) {
744                            log.error("JobClient error: ", e);
745                        }
746                        else {
747                            throw convertException(e);
748                        }
749                    }
750                }
751            }
752        }
753    
754        protected boolean getCaptureOutput(WorkflowAction action) throws JDOMException {
755            Element eConf = XmlUtils.parseXml(action.getConf());
756            Namespace ns = eConf.getNamespace();
757            Element captureOutput = eConf.getChild("capture-output", ns);
758            return captureOutput != null;
759        }
760    
761        @Override
762        public void kill(Context context, WorkflowAction action) throws ActionExecutorException {
763            JobClient jobClient = null;
764            boolean exception = false;
765            try {
766                Element actionXml = XmlUtils.parseXml(action.getConf());
767                Configuration conf = createBaseHadoopConf(context, actionXml);
768                JobConf jobConf = new JobConf();
769                XConfiguration.copy(conf, jobConf);
770                jobClient = createJobClient(context, jobConf);
771                RunningJob runningJob = jobClient.getJob(JobID.forName(action.getExternalId()));
772                if (runningJob != null) {
773                    runningJob.killJob();
774                }
775                context.setExternalStatus(KILLED);
776                context.setExecutionData(KILLED, null);
777            }
778            catch (Exception ex) {
779                exception = true;
780                throw convertException(ex);
781            }
782            finally {
783                try {
784                    FileSystem actionFs = getActionFileSystem(context, action);
785                    cleanUpActionDir(actionFs, context);
786                    if (jobClient != null) {
787                        jobClient.close();
788                    }
789                }
790                catch (Exception ex) {
791                    if (exception) {
792                        log.error("Error: ", ex);
793                    }
794                    else {
795                        throw convertException(ex);
796                    }
797                }
798            }
799        }
800    
801        private static Set<String> FINAL_STATUS = new HashSet<String>();
802    
803        static {
804            FINAL_STATUS.add(SUCCEEDED);
805            FINAL_STATUS.add(KILLED);
806            FINAL_STATUS.add(FAILED);
807            FINAL_STATUS.add(FAILED_KILLED);
808        }
809    
810        @Override
811        public boolean isCompleted(String externalStatus) {
812            return FINAL_STATUS.contains(externalStatus);
813        }
814    
815    }