001 /** 002 * Copyright (c) 2010 Yahoo! Inc. All rights reserved. 003 * Licensed under the Apache License, Version 2.0 (the "License"); 004 * you may not use this file except in compliance with the License. 005 * You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software 010 * distributed under the License is distributed on an "AS IS" BASIS, 011 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 * See the License for the specific language governing permissions and 013 * limitations under the License. See accompanying LICENSE file. 014 */ 015 package org.apache.oozie.action.hadoop; 016 017 import org.apache.pig.Main; 018 import org.apache.pig.PigRunner; 019 import org.apache.pig.tools.pigstats.PigStats; 020 import org.apache.hadoop.conf.Configuration; 021 import org.apache.hadoop.fs.Path; 022 import org.apache.hadoop.mapred.JobClient; 023 024 import java.io.FileNotFoundException; 025 import java.io.OutputStream; 026 import java.io.FileOutputStream; 027 import java.io.BufferedReader; 028 import java.io.FileReader; 029 import java.io.File; 030 import java.io.IOException; 031 import java.util.Arrays; 032 import java.util.HashSet; 033 import java.util.Map; 034 import java.util.List; 035 import java.util.ArrayList; 036 import java.util.Properties; 037 import java.util.Set; 038 import java.net.URL; 039 040 public class PigMain extends LauncherMain { 041 private static final Set<String> DISALLOWED_PIG_OPTIONS = new HashSet<String>(); 042 043 static { 044 DISALLOWED_PIG_OPTIONS.add("-4"); 045 DISALLOWED_PIG_OPTIONS.add("-log4jconf"); 046 DISALLOWED_PIG_OPTIONS.add("-e"); 047 DISALLOWED_PIG_OPTIONS.add("-execute"); 048 DISALLOWED_PIG_OPTIONS.add("-f"); 049 DISALLOWED_PIG_OPTIONS.add("-file"); 050 DISALLOWED_PIG_OPTIONS.add("-l"); 051 DISALLOWED_PIG_OPTIONS.add("-logfile"); 052 DISALLOWED_PIG_OPTIONS.add("-r"); 053 DISALLOWED_PIG_OPTIONS.add("-dryrun"); 054 DISALLOWED_PIG_OPTIONS.add("-x"); 055 DISALLOWED_PIG_OPTIONS.add("-exectype"); 056 DISALLOWED_PIG_OPTIONS.add("-P"); 057 DISALLOWED_PIG_OPTIONS.add("-propertyFile"); 058 } 059 060 public static void main(String[] args) throws Exception { 061 run(PigMain.class, args); 062 } 063 064 protected void run(String[] args) throws Exception { 065 System.out.println(); 066 System.out.println("Oozie Pig action configuration"); 067 System.out.println("================================================================="); 068 069 // loading action conf prepared by Oozie 070 Configuration actionConf = new Configuration(false); 071 072 String actionXml = System.getProperty("oozie.action.conf.xml"); 073 074 if (actionXml == null) { 075 throw new RuntimeException("Missing Java System Property [oozie.action.conf.xml]"); 076 } 077 if (!new File(actionXml).exists()) { 078 throw new RuntimeException("Action Configuration XML file [" + actionXml + "] does not exist"); 079 } 080 081 actionConf.addResource(new Path("file:///", actionXml)); 082 083 Properties pigProperties = new Properties(); 084 for (Map.Entry<String, String> entry : actionConf) { 085 pigProperties.setProperty(entry.getKey(), entry.getValue()); 086 } 087 088 //propagate delegation related props from launcher job to Pig job 089 if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { 090 pigProperties.setProperty("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); 091 System.out.println("------------------------"); 092 System.out.println("Setting env property for mapreduce.job.credentials.binary to:" 093 + System.getenv("HADOOP_TOKEN_FILE_LOCATION")); 094 System.out.println("------------------------"); 095 System.setProperty("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); 096 } 097 else { 098 System.out.println("Non-kerberoes execution"); 099 } 100 101 OutputStream os = new FileOutputStream("pig.properties"); 102 pigProperties.store(os, ""); 103 os.close(); 104 105 logMasking("pig.properties:", Arrays.asList("password"), pigProperties.entrySet()); 106 107 List<String> arguments = new ArrayList<String>(); 108 String script = actionConf.get("oozie.pig.script"); 109 110 if (script == null) { 111 throw new RuntimeException("Action Configuration does not have [oozie.pig.script] property"); 112 } 113 114 if (!new File(script).exists()) { 115 throw new RuntimeException("Error: Pig script file [" + script + "] does not exist"); 116 } 117 118 System.out.println("Pig script [" + script + "] content: "); 119 System.out.println("------------------------"); 120 BufferedReader br = new BufferedReader(new FileReader(script)); 121 String line = br.readLine(); 122 while (line != null) { 123 System.out.println(line); 124 line = br.readLine(); 125 } 126 br.close(); 127 System.out.println("------------------------"); 128 System.out.println(); 129 130 arguments.add("-file"); 131 arguments.add(script); 132 String[] params = MapReduceMain.getStrings(actionConf, "oozie.pig.params"); 133 for (String param : params) { 134 arguments.add("-param"); 135 arguments.add(param); 136 } 137 138 String hadoopJobId = System.getProperty("oozie.launcher.job.id"); 139 if (hadoopJobId == null) { 140 throw new RuntimeException("Launcher Hadoop Job ID system property not set"); 141 } 142 143 String logFile = new File("pig-oozie-" + hadoopJobId + ".log").getAbsolutePath(); 144 145 URL log4jFile = Thread.currentThread().getContextClassLoader().getResource("log4j.properties"); 146 if (log4jFile != null) { 147 148 String pigLogLevel = actionConf.get("oozie.pig.log.level", "INFO"); 149 150 // append required PIG properties to the default hadoop log4j file 151 Properties hadoopProps = new Properties(); 152 hadoopProps.load(log4jFile.openStream()); 153 hadoopProps.setProperty("log4j.logger.org.apache.pig", pigLogLevel + ", A, B"); 154 hadoopProps.setProperty("log4j.appender.A", "org.apache.log4j.ConsoleAppender"); 155 hadoopProps.setProperty("log4j.appender.A.layout", "org.apache.log4j.PatternLayout"); 156 hadoopProps.setProperty("log4j.appender.A.layout.ConversionPattern", "%-4r [%t] %-5p %c %x - %m%n"); 157 hadoopProps.setProperty("log4j.appender.B", "org.apache.log4j.FileAppender"); 158 hadoopProps.setProperty("log4j.appender.B.file", logFile); 159 hadoopProps.setProperty("log4j.appender.B.layout", "org.apache.log4j.PatternLayout"); 160 hadoopProps.setProperty("log4j.appender.B.layout.ConversionPattern", "%-4r [%t] %-5p %c %x - %m%n"); 161 162 String localProps = new File("piglog4j.properties").getAbsolutePath(); 163 OutputStream os1 = new FileOutputStream(localProps); 164 hadoopProps.store(os1, ""); 165 os1.close(); 166 167 arguments.add("-log4jconf"); 168 arguments.add(localProps); 169 170 // print out current directory 171 File localDir = new File(localProps).getParentFile(); 172 System.out.println("Current (local) dir = " + localDir.getAbsolutePath()); 173 } 174 else { 175 System.out.println("log4jfile is null"); 176 } 177 178 String pigLog = "pig-" + hadoopJobId + ".log"; 179 arguments.add("-logfile"); 180 arguments.add(pigLog); 181 182 String[] pigArgs = MapReduceMain.getStrings(actionConf, "oozie.pig.args"); 183 for (String pigArg : pigArgs) { 184 if (DISALLOWED_PIG_OPTIONS.contains(pigArg)) { 185 throw new RuntimeException("Error: Pig argument " + pigArg + " is not supported"); 186 } 187 arguments.add(pigArg); 188 } 189 190 System.out.println("Pig command arguments :"); 191 for (String arg : arguments) { 192 System.out.println(" " + arg); 193 } 194 195 System.out.println("================================================================="); 196 System.out.println(); 197 System.out.println(">>> Invoking Pig command line now >>>"); 198 System.out.println(); 199 System.out.flush(); 200 201 System.out.println(); 202 runPigJob(new String[] { "-version" }, null, true); 203 System.out.println(); 204 System.out.flush(); 205 206 runPigJob(arguments.toArray(new String[arguments.size()]), pigLog, false); 207 208 System.out.println(); 209 System.out.println("<<< Invocation of Pig command completed <<<"); 210 System.out.println(); 211 212 // harvesting and recording Hadoop Job IDs 213 Properties jobIds = getHadoopJobIds(logFile); 214 File file = new File(System.getProperty("oozie.action.output.properties")); 215 os = new FileOutputStream(file); 216 jobIds.store(os, ""); 217 os.close(); 218 System.out.println(" Hadoop Job IDs executed by Pig: " + jobIds.getProperty("hadoopJobs")); 219 System.out.println(); 220 } 221 222 private void handleError(String pigLog) throws Exception { 223 System.err.println(); 224 System.err.println("Pig logfile dump:"); 225 System.err.println(); 226 try { 227 BufferedReader reader = new BufferedReader(new FileReader(pigLog)); 228 String line = reader.readLine(); 229 while (line != null) { 230 System.err.println(line); 231 line = reader.readLine(); 232 } 233 reader.close(); 234 } 235 catch (FileNotFoundException e) { 236 System.err.println("pig log file: " + pigLog + " not found."); 237 } 238 } 239 240 /** 241 * @param args pig command line arguments 242 * @param pigLog pig log file 243 * @param resetSecurityManager specify if need to reset security manager 244 * @throws Exception 245 */ 246 protected void runPigJob(String[] args, String pigLog, boolean resetSecurityManager) throws Exception { 247 // running as from the command line 248 boolean pigRunnerExists = true; 249 Class klass; 250 try { 251 klass = Class.forName("org.apache.pig.PigRunner"); 252 } 253 catch (ClassNotFoundException ex) { 254 pigRunnerExists = false; 255 } 256 257 if (pigRunnerExists) { 258 System.out.println("Run pig script using PigRunner.run() for Pig version 0.8+"); 259 PigStats stats = PigRunner.run(args, null); 260 int code = stats.getReturnCode(); 261 if (code != 0) { 262 if (pigLog != null) { 263 handleError(pigLog); 264 } 265 throw new LauncherMainException(code); 266 } 267 } 268 else { 269 try { 270 System.out.println("Run pig script using Main.main() for Pig version before 0.8"); 271 Main.main(args); 272 } 273 catch (SecurityException ex) { 274 if (resetSecurityManager) { 275 LauncherSecurityManager.reset(); 276 } 277 else { 278 if (LauncherSecurityManager.getExitInvoked()) { 279 if (LauncherSecurityManager.getExitCode() != 0) { 280 if (pigLog != null) { 281 handleError(pigLog); 282 } 283 throw ex; 284 } 285 } 286 } 287 } 288 } 289 } 290 291 public static void setPigScript(Configuration conf, String script, String[] params, String[] args) { 292 conf.set("oozie.pig.script", script); 293 MapReduceMain.setStrings(conf, "oozie.pig.params", params); 294 MapReduceMain.setStrings(conf, "oozie.pig.args", args); 295 } 296 297 private static final String JOB_ID_LOG_PREFIX = "HadoopJobId: "; 298 299 protected Properties getHadoopJobIds(String logFile) throws IOException { 300 int jobCount = 0; 301 Properties props = new Properties(); 302 StringBuffer sb = new StringBuffer(100); 303 if (new File(logFile).exists() == false) { 304 System.err.println("pig log file: " + logFile + " not present. Therefore no Hadoop jobids found"); 305 props.setProperty("hadoopJobs", ""); 306 } 307 else { 308 BufferedReader br = new BufferedReader(new FileReader(logFile)); 309 String line = br.readLine(); 310 String separator = ""; 311 while (line != null) { 312 if (line.contains(JOB_ID_LOG_PREFIX)) { 313 int jobIdStarts = line.indexOf(JOB_ID_LOG_PREFIX) + JOB_ID_LOG_PREFIX.length(); 314 String jobId = line.substring(jobIdStarts); 315 int jobIdEnds = jobId.indexOf(" "); 316 if (jobIdEnds > -1) { 317 jobId = jobId.substring(0, jobId.indexOf(" ")); 318 } 319 sb.append(separator).append(jobId); 320 separator = ","; 321 } 322 line = br.readLine(); 323 } 324 br.close(); 325 props.setProperty("hadoopJobs", sb.toString()); 326 } 327 return props; 328 } 329 330 }