001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.command; 016 017 import java.util.ArrayList; 018 import java.util.List; 019 020 import org.apache.oozie.CoordinatorActionBean; 021 import org.apache.oozie.CoordinatorJobBean; 022 import org.apache.oozie.ErrorCode; 023 import org.apache.oozie.FaultInjection; 024 import org.apache.oozie.WorkflowActionBean; 025 import org.apache.oozie.WorkflowJobBean; 026 import org.apache.oozie.XException; 027 import org.apache.oozie.service.CallableQueueService; 028 import org.apache.oozie.service.DagXLogInfoService; 029 import org.apache.oozie.service.InstrumentationService; 030 import org.apache.oozie.service.MemoryLocksService; 031 import org.apache.oozie.service.Services; 032 import org.apache.oozie.service.StoreService; 033 import org.apache.oozie.service.XLogService; 034 import org.apache.oozie.store.Store; 035 import org.apache.oozie.store.StoreException; 036 import org.apache.oozie.store.WorkflowStore; 037 import org.apache.oozie.util.Instrumentation; 038 import org.apache.oozie.util.ParamChecker; 039 import org.apache.oozie.util.XCallable; 040 import org.apache.oozie.util.XLog; 041 import org.apache.oozie.util.MemoryLocks.LockToken; 042 043 /** 044 * Base class for all synchronous and asynchronous DagEngine commands. 045 */ 046 public abstract class Command<T, S extends Store> implements XCallable<T> { 047 /** 048 * The instrumentation group used for Commands. 049 */ 050 private static final String INSTRUMENTATION_GROUP = "commands"; 051 052 private final long createdTime; 053 054 /** 055 * The instrumentation group used for Jobs. 056 */ 057 private static final String INSTRUMENTATION_JOB_GROUP = "jobs"; 058 059 private static final long LOCK_TIMEOUT = 1000; 060 protected static final long LOCK_FAILURE_REQUEUE_INTERVAL = 30000; 061 062 protected Instrumentation instrumentation; 063 private List<XCallable<Void>> callables; 064 private List<XCallable<Void>> delayedCallables; 065 private long delay = 0; 066 private List<XCallable<Void>> exceptionCallables; 067 private String name; 068 private int priority; 069 private int logMask; 070 private boolean withStore; 071 protected boolean dryrun = false; 072 protected String type; 073 private ArrayList<LockToken> locks = null; 074 075 /** 076 * This variable is package private for testing purposes only. 077 */ 078 XLog.Info logInfo; 079 080 /** 081 * Create a command that uses a {@link WorkflowStore} instance. <p/> The current {@link XLog.Info} values are 082 * captured for execution. 083 * 084 * @param name command name. 085 * @param type command type. 086 * @param priority priority of the command, used when queuing for asynchronous execution. 087 * @param logMask log mask for the command logging calls. 088 */ 089 public Command(String name, String type, int priority, int logMask) { 090 this(name, type, priority, logMask, true); 091 } 092 093 /** 094 * Create a command. <p/> The current {@link XLog.Info} values are captured for execution. 095 * 096 * @param name command name. 097 * @param type command type. 098 * @param priority priority of the command, used when queuing for asynchronous execution. 099 * @param logMask log mask for the command logging calls. 100 * @param withStore indicates if the command needs a {@link org.apache.oozie.store.WorkflowStore} instance or not. 101 */ 102 public Command(String name, String type, int priority, int logMask, boolean withStore) { 103 this.name = ParamChecker.notEmpty(name, "name"); 104 this.type = ParamChecker.notEmpty(type, "type"); 105 this.priority = priority; 106 this.withStore = withStore; 107 this.logMask = logMask; 108 instrumentation = Services.get().get(InstrumentationService.class).get(); 109 logInfo = new XLog.Info(XLog.Info.get()); 110 createdTime = System.currentTimeMillis(); 111 locks = new ArrayList<LockToken>(); 112 } 113 114 /** 115 * Create a command. <p/> The current {@link XLog.Info} values are captured for execution. 116 * 117 * @param name command name. 118 * @param type command type. 119 * @param priority priority of the command, used when queuing for asynchronous execution. 120 * @param logMask log mask for the command logging calls. 121 * @param withStore indicates if the command needs a {@link org.apache.oozie.store.WorkflowStore} instance or not. 122 * @param dryrun indicates if dryrun option is enabled. if enabled coordinator will show a diagnostic output without 123 * really submitting the job 124 */ 125 public Command(String name, String type, int priority, int logMask, boolean withStore, boolean dryrun) { 126 this(name, type, priority, logMask, withStore); 127 this.dryrun = dryrun; 128 } 129 130 /** 131 * Return the name of the command. 132 * 133 * @return the name of the command. 134 */ 135 @Override 136 public String getName() { 137 return name; 138 } 139 140 /** 141 * Return the callable type. <p/> The callable type is used for concurrency throttling in the {@link 142 * org.apache.oozie.service.CallableQueueService}. 143 * 144 * @return the callable type. 145 */ 146 @Override 147 public String getType() { 148 return type; 149 } 150 151 /** 152 * Return the priority of the command. 153 * 154 * @return the priority of the command. 155 */ 156 @Override 157 public int getPriority() { 158 return priority; 159 } 160 161 /** 162 * Returns the createdTime of the callable in milliseconds 163 * 164 * @return the callable createdTime 165 */ 166 @Override 167 public long getCreatedTime() { 168 return createdTime; 169 } 170 171 /** 172 * Execute the command {@link #call(WorkflowStore)} setting all the necessary context. <p/> The {@link XLog.Info} is 173 * set to the values at instance creation time. <p/> The command execution is logged and instrumented. <p/> If a 174 * {@link WorkflowStore} is used, a fresh instance will be passed and it will be commited after the {@link 175 * #call(WorkflowStore)} execution. It will be closed without committing if an exception is thrown. <p/> Commands 176 * queued via the DagCommand queue methods are queued for execution after the workflow store has been committed. 177 * <p/> If an exception happends the queued commands will not be effectively queued for execution. Instead, the the 178 * commands queued for exception will be effectively queued fro execution.. 179 * 180 * @throws CommandException thrown if the command could not be executed successfully, the workflow store is closed 181 * without committing, thus doing a rollback. 182 */ 183 @SuppressWarnings({"ThrowFromFinallyBlock", "unchecked"}) 184 public final T call() throws CommandException { 185 XLog.Info.get().setParameters(logInfo); 186 XLog log = XLog.getLog(getClass()); 187 log.trace(logMask, "Start"); 188 Instrumentation.Cron cron = new Instrumentation.Cron(); 189 cron.start(); 190 callables = new ArrayList<XCallable<Void>>(); 191 delayedCallables = new ArrayList<XCallable<Void>>(); 192 exceptionCallables = new ArrayList<XCallable<Void>>(); 193 delay = 0; 194 S store = null; 195 boolean exception = false; 196 197 try { 198 if (withStore) { 199 store = (S) Services.get().get(StoreService.class).getStore(getStoreClass()); 200 store.beginTrx(); 201 } 202 T result = execute(store); 203 /* 204 * 205 * if (store != null && log != null) { log.info(XLog.STD, 206 * "connection log from store Flush Mode {0} ", 207 * store.getFlushMode()); } 208 */ 209 if (withStore) { 210 if (store == null) { 211 throw new IllegalStateException("WorkflowStore should not be null"); 212 } 213 if (FaultInjection.isActive("org.apache.oozie.command.SkipCommitFaultInjection")) { 214 throw new RuntimeException("Skipping Commit for Failover Testing"); 215 } 216 store.commitTrx(); 217 } 218 219 // TODO figure out the reject due to concurrency problems and remove 220 // the delayed queuing for callables. 221 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(callables, 10); 222 if (ret == false) { 223 logQueueCallableFalse(callables); 224 } 225 226 ret = Services.get().get(CallableQueueService.class).queueSerial(delayedCallables, delay); 227 if (ret == false) { 228 logQueueCallableFalse(delayedCallables); 229 } 230 231 return result; 232 } 233 catch (XException ex) { 234 log.error(logMask | XLog.OPS, "XException, {0}", ex); 235 if (store != null) { 236 log.info(XLog.STD, "XException - connection logs from store {0}, {1}", store.getConnection(), store 237 .isClosed()); 238 } 239 exception = true; 240 if (store != null && store.isActive()) { 241 try { 242 store.rollbackTrx(); 243 } 244 catch (RuntimeException rex) { 245 log.warn(logMask | XLog.OPS, "openjpa error, {0}, {1}", name, rex.getMessage(), rex); 246 } 247 } 248 249 // TODO figure out the reject due to concurrency problems and remove 250 // the delayed queuing for callables. 251 boolean ret = Services.get().get(CallableQueueService.class).queueSerial(exceptionCallables, 10); 252 if (ret == false) { 253 logQueueCallableFalse(exceptionCallables); 254 } 255 if (ex instanceof CommandException) { 256 throw (CommandException) ex; 257 } 258 else { 259 throw new CommandException(ex); 260 } 261 } 262 catch (Exception ex) { 263 log.error(logMask | XLog.OPS, "Exception, {0}", ex); 264 exception = true; 265 if (store != null && store.isActive()) { 266 try { 267 store.rollbackTrx(); 268 } 269 catch (RuntimeException rex) { 270 log.warn(logMask | XLog.OPS, "openjpa error, {0}, {1}", name, rex.getMessage(), rex); 271 } 272 } 273 throw new CommandException(ErrorCode.E0607, ex); 274 } 275 catch (Error er) { 276 log.error(logMask | XLog.OPS, "Error, {0}", er); 277 exception = true; 278 if (store != null && store.isActive()) { 279 try { 280 store.rollbackTrx(); 281 } 282 catch (RuntimeException rex) { 283 log.warn(logMask | XLog.OPS, "openjpa error, {0}, {1}", name, rex.getMessage(), rex); 284 } 285 } 286 throw er; 287 } 288 finally { 289 FaultInjection.deactivate("org.apache.oozie.command.SkipCommitFaultInjection"); 290 cron.stop(); 291 instrumentation.addCron(INSTRUMENTATION_GROUP, name, cron); 292 incrCommandCounter(1); 293 log.trace(logMask, "End"); 294 if (locks != null) { 295 for (LockToken lock : locks) { 296 lock.release(); 297 } 298 locks.clear(); 299 } 300 if (store != null) { 301 if (!store.isActive()) { 302 try { 303 store.closeTrx(); 304 } 305 catch (RuntimeException rex) { 306 if (exception) { 307 log.warn(logMask | XLog.OPS, "openjpa error, {0}, {1}", name, rex.getMessage(), rex); 308 } 309 else { 310 throw rex; 311 } 312 } 313 } 314 else { 315 log.warn(logMask | XLog.OPS, "transaction is not committed or rolled back before closing entitymanager."); 316 } 317 } 318 } 319 } 320 321 /** 322 * Queue a callable for execution after the current callable call invocation completes and the {@link WorkflowStore} 323 * transaction commits. <p/> All queued callables, regardless of the number of queue invocations, are queued for a 324 * single serial execution. <p/> If the call invocation throws an exception all queued callables are discarded, they 325 * are not queued for execution. 326 * 327 * @param callable callable to queue for execution. 328 */ 329 protected void queueCallable(XCallable<Void> callable) { 330 callables.add(callable); 331 } 332 333 /** 334 * Queue a list of callables for execution after the current callable call invocation completes and the {@link 335 * WorkflowStore} transaction commits. <p/> All queued callables, regardless of the number of queue invocations, are 336 * queued for a single serial execution. <p/> If the call invocation throws an exception all queued callables are 337 * discarded, they are not queued for execution. 338 * 339 * @param callables list of callables to queue for execution. 340 */ 341 protected void queueCallable(List<? extends XCallable<Void>> callables) { 342 this.callables.addAll(callables); 343 } 344 345 /** 346 * Queue a callable for delayed execution after the current callable call invocation completes and the {@link 347 * WorkflowStore} transaction commits. <p/> All queued delayed callables, regardless of the number of delay queue 348 * invocations, are queued for a single serial delayed execution with the highest delay of all queued callables. 349 * <p/> If the call invocation throws an exception all queued callables are discarded, they are not queued for 350 * execution. 351 * 352 * @param callable callable to queue for delayed execution. 353 * @param delay the queue delay in milliseconds 354 */ 355 protected void queueCallable(XCallable<Void> callable, long delay) { 356 this.delayedCallables.add(callable); 357 this.delay = Math.max(this.delay, delay); 358 } 359 360 /** 361 * Queue a callable for execution only in the event of an exception being thrown during the call invocation. <p/> If 362 * an exception does not happen, all the callables queued by this method are discarded, they are not queued for 363 * execution. <p/> All queued callables, regardless of the number of queue invocations, are queued for a single 364 * serial execution. 365 * 366 * @param callable callable to queue for execution in the case of an exception. 367 */ 368 protected void queueCallableForException(XCallable<Void> callable) { 369 exceptionCallables.add(callable); 370 } 371 372 /** 373 * Logging the info if failed to queue the callables. 374 * 375 * @param callables 376 */ 377 protected void logQueueCallableFalse(List<? extends XCallable<Void>> callables) { 378 StringBuilder sb = new StringBuilder( 379 "Unable to queue the callables, delayedQueue is full or system is in SAFEMODE - failed to queue:["); 380 int size = callables.size(); 381 for (int i = 0; i < size; i++) { 382 XCallable<Void> callable = callables.get(i); 383 sb.append(callable.getName()); 384 if (i < size - 1) { 385 sb.append(", "); 386 } 387 else { 388 sb.append("]"); 389 } 390 } 391 XLog.getLog(getClass()).warn(sb.toString()); 392 } 393 394 /** 395 * DagCallable subclasses must implement this method to perform their task. <p/> The workflow store works in 396 * transactional mode. The transaction is committed only if this method ends successfully. Otherwise the transaction 397 * is rolledback. 398 * 399 * @param store the workflow store instance for the callable, <code>null</code> if the callable does not use a 400 * store. 401 * @return the return value of the callable. 402 * @throws StoreException thrown if the workflow store could not perform an operation. 403 * @throws CommandException thrown if the command could not perform its operation. 404 */ 405 protected abstract T call(S store) throws StoreException, CommandException; 406 407 // to do 408 // need to implement on all sub commands and break down the transactions 409 410 // protected abstract T execute(String id) throws CommandException; 411 412 /** 413 * Command subclasses must implement this method correct Store can be passed to call(store); 414 * 415 * @return the Store class for use by Callable 416 * @throws CommandException thrown if the command could not perform its operation. 417 */ 418 protected abstract Class<? extends Store> getStoreClass(); 419 420 /** 421 * Set the log info with the context of the given coordinator bean. 422 * 423 * @param cBean coordinator bean. 424 */ 425 protected void setLogInfo(CoordinatorJobBean cBean) { 426 if (logInfo.getParameter(XLogService.GROUP) == null) { 427 logInfo.setParameter(XLogService.GROUP, cBean.getGroup()); 428 } 429 if (logInfo.getParameter(XLogService.USER) == null) { 430 logInfo.setParameter(XLogService.USER, cBean.getUser()); 431 } 432 logInfo.setParameter(DagXLogInfoService.JOB, cBean.getId()); 433 logInfo.setParameter(DagXLogInfoService.TOKEN, ""); 434 logInfo.setParameter(DagXLogInfoService.APP, cBean.getAppName()); 435 XLog.Info.get().setParameters(logInfo); 436 } 437 438 /** 439 * Set the log info with the context of the given coordinator action bean. 440 * 441 * @param action action bean. 442 */ 443 protected void setLogInfo(CoordinatorActionBean action) { 444 logInfo.setParameter(DagXLogInfoService.JOB, action.getJobId()); 445 // logInfo.setParameter(DagXLogInfoService.TOKEN, action.getLogToken()); 446 logInfo.setParameter(DagXLogInfoService.ACTION, action.getId()); 447 XLog.Info.get().setParameters(logInfo); 448 } 449 450 /** 451 * Set the log info with the context of the given workflow bean. 452 * 453 * @param workflow workflow bean. 454 */ 455 protected void setLogInfo(WorkflowJobBean workflow) { 456 if (logInfo.getParameter(XLogService.GROUP) == null) { 457 logInfo.setParameter(XLogService.GROUP, workflow.getGroup()); 458 } 459 if (logInfo.getParameter(XLogService.USER) == null) { 460 logInfo.setParameter(XLogService.USER, workflow.getUser()); 461 } 462 logInfo.setParameter(DagXLogInfoService.JOB, workflow.getId()); 463 logInfo.setParameter(DagXLogInfoService.TOKEN, workflow.getLogToken()); 464 logInfo.setParameter(DagXLogInfoService.APP, workflow.getAppName()); 465 XLog.Info.get().setParameters(logInfo); 466 } 467 468 /** 469 * Set the log info with the context of the given action bean. 470 * 471 * @param action action bean. 472 */ 473 protected void setLogInfo(WorkflowActionBean action) { 474 logInfo.setParameter(DagXLogInfoService.JOB, action.getJobId()); 475 logInfo.setParameter(DagXLogInfoService.TOKEN, action.getLogToken()); 476 logInfo.setParameter(DagXLogInfoService.ACTION, action.getId()); 477 XLog.Info.get().setParameters(logInfo); 478 } 479 480 /** 481 * Reset the action bean information from the log info. 482 */ 483 // TODO check if they are used, else delete 484 protected void resetLogInfoAction() { 485 logInfo.clearParameter(DagXLogInfoService.ACTION); 486 XLog.Info.get().clearParameter(DagXLogInfoService.ACTION); 487 } 488 489 /** 490 * Reset the workflow bean information from the log info. 491 */ 492 // TODO check if they are used, else delete 493 protected void resetLogInfoWorkflow() { 494 logInfo.clearParameter(DagXLogInfoService.JOB); 495 logInfo.clearParameter(DagXLogInfoService.APP); 496 logInfo.clearParameter(DagXLogInfoService.TOKEN); 497 XLog.Info.get().clearParameter(DagXLogInfoService.JOB); 498 XLog.Info.get().clearParameter(DagXLogInfoService.APP); 499 XLog.Info.get().clearParameter(DagXLogInfoService.TOKEN); 500 } 501 502 /** 503 * Convenience method to increment counters. 504 * 505 * @param group the group name. 506 * @param name the counter name. 507 * @param count increment count. 508 */ 509 private void incrCounter(String group, String name, int count) { 510 if (instrumentation != null) { 511 instrumentation.incr(group, name, count); 512 } 513 } 514 515 /** 516 * Used to increment command counters. 517 * 518 * @param count the increment count. 519 */ 520 protected void incrCommandCounter(int count) { 521 incrCounter(INSTRUMENTATION_GROUP, name, count); 522 } 523 524 /** 525 * Used to increment job counters. The counter name s the same as the command name. 526 * 527 * @param count the increment count. 528 */ 529 protected void incrJobCounter(int count) { 530 incrJobCounter(name, count); 531 } 532 533 /** 534 * Used to increment job counters. 535 * 536 * @param name the job name. 537 * @param count the increment count. 538 */ 539 protected void incrJobCounter(String name, int count) { 540 incrCounter(INSTRUMENTATION_JOB_GROUP, name, count); 541 } 542 543 /** 544 * Return the {@link Instrumentation} instance in use. 545 * 546 * @return the {@link Instrumentation} instance in use. 547 */ 548 protected Instrumentation getInstrumentation() { 549 return instrumentation; 550 } 551 552 /** 553 * Return the identity. 554 * 555 * @return the identity. 556 */ 557 @Override 558 public String toString() { 559 StringBuilder sb = new StringBuilder(); 560 sb.append(getType()); 561 sb.append(",").append(getPriority()); 562 return sb.toString(); 563 } 564 565 protected boolean lock(String id) throws InterruptedException { 566 if (id == null || id.length() == 0) { 567 XLog.getLog(getClass()).warn("lock(): Id is null or empty :" + id + ":"); 568 return false; 569 } 570 LockToken token = Services.get().get(MemoryLocksService.class).getWriteLock(id, LOCK_TIMEOUT); 571 if (token != null) { 572 locks.add(token); 573 return true; 574 } 575 else { 576 return false; 577 } 578 } 579 580 /* 581 * TODO - remove store coupling to EM. Store will only contain queries 582 * protected EntityManager getEntityManager() { return 583 * store.getEntityManager(); } 584 */ 585 protected T execute(S store) throws CommandException, StoreException { 586 T result = call(store); 587 return result; 588 } 589 }