001    /**
002     * Copyright (c) 2010 Yahoo! Inc. All rights reserved.
003     * Licensed under the Apache License, Version 2.0 (the "License");
004     * you may not use this file except in compliance with the License.
005     * You may obtain a copy of the License at
006     *
007     *   http://www.apache.org/licenses/LICENSE-2.0
008     *
009     *  Unless required by applicable law or agreed to in writing, software
010     *  distributed under the License is distributed on an "AS IS" BASIS,
011     *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012     *  See the License for the specific language governing permissions and
013     *  limitations under the License. See accompanying LICENSE file.
014     */
015    package org.apache.oozie.action.hadoop;
016    
017    import org.apache.pig.Main;
018    import org.apache.pig.PigRunner;
019    import org.apache.pig.tools.pigstats.PigStats;
020    import org.apache.hadoop.conf.Configuration;
021    import org.apache.hadoop.fs.Path;
022    import org.apache.hadoop.mapred.JobClient;
023    
024    import java.io.FileNotFoundException;
025    import java.io.OutputStream;
026    import java.io.FileOutputStream;
027    import java.io.BufferedReader;
028    import java.io.FileReader;
029    import java.io.File;
030    import java.io.IOException;
031    import java.util.Arrays;
032    import java.util.HashSet;
033    import java.util.Map;
034    import java.util.List;
035    import java.util.ArrayList;
036    import java.util.Properties;
037    import java.util.Set;
038    import java.net.URL;
039    
040    public class PigMain extends LauncherMain {
041        private static final Set<String> DISALLOWED_PIG_OPTIONS = new HashSet<String>();
042    
043        static {
044            DISALLOWED_PIG_OPTIONS.add("-4");
045            DISALLOWED_PIG_OPTIONS.add("-log4jconf");
046            DISALLOWED_PIG_OPTIONS.add("-e");
047            DISALLOWED_PIG_OPTIONS.add("-execute");
048            DISALLOWED_PIG_OPTIONS.add("-f");
049            DISALLOWED_PIG_OPTIONS.add("-file");
050            DISALLOWED_PIG_OPTIONS.add("-l");
051            DISALLOWED_PIG_OPTIONS.add("-logfile");
052            DISALLOWED_PIG_OPTIONS.add("-r");
053            DISALLOWED_PIG_OPTIONS.add("-dryrun");
054            DISALLOWED_PIG_OPTIONS.add("-x");
055            DISALLOWED_PIG_OPTIONS.add("-exectype");
056            DISALLOWED_PIG_OPTIONS.add("-P");
057            DISALLOWED_PIG_OPTIONS.add("-propertyFile");
058        }
059    
060        public static void main(String[] args) throws Exception {
061            run(PigMain.class, args);
062        }
063    
064        protected void run(String[] args) throws Exception {
065            System.out.println();
066            System.out.println("Oozie Pig action configuration");
067            System.out.println("=================================================================");
068    
069            // loading action conf prepared by Oozie
070            Configuration actionConf = new Configuration(false);
071    
072            String actionXml = System.getProperty("oozie.action.conf.xml");
073    
074            if (actionXml == null) {
075                throw new RuntimeException("Missing Java System Property [oozie.action.conf.xml]");
076            }
077            if (!new File(actionXml).exists()) {
078                throw new RuntimeException("Action Configuration XML file [" + actionXml + "] does not exist");
079            }
080    
081            actionConf.addResource(new Path("file:///", actionXml));
082    
083            Properties pigProperties = new Properties();
084            for (Map.Entry<String, String> entry : actionConf) {
085                pigProperties.setProperty(entry.getKey(), entry.getValue());
086            }
087    
088            //propagate delegation related props from launcher job to Pig job
089            if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
090                pigProperties.setProperty("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
091                System.out.println("------------------------");
092                System.out.println("Setting env property for mapreduce.job.credentials.binary to:"
093                        + System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
094                System.out.println("------------------------");
095                System.setProperty("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
096            }
097            else {
098                System.out.println("Non-kerberoes execution");
099            }
100    
101            OutputStream os = new FileOutputStream("pig.properties");
102            pigProperties.store(os, "");
103            os.close();
104    
105            logMasking("pig.properties:", Arrays.asList("password"), pigProperties.entrySet());
106    
107            List<String> arguments = new ArrayList<String>();
108            String script = actionConf.get("oozie.pig.script");
109    
110            if (script == null) {
111                throw new RuntimeException("Action Configuration does not have [oozie.pig.script] property");
112            }
113    
114            if (!new File(script).exists()) {
115                throw new RuntimeException("Error: Pig script file [" + script + "] does not exist");
116            }
117    
118            System.out.println("Pig script [" + script + "] content: ");
119            System.out.println("------------------------");
120            BufferedReader br = new BufferedReader(new FileReader(script));
121            String line = br.readLine();
122            while (line != null) {
123                System.out.println(line);
124                line = br.readLine();
125            }
126            br.close();
127            System.out.println("------------------------");
128            System.out.println();
129    
130            arguments.add("-file");
131            arguments.add(script);
132            String[] params = MapReduceMain.getStrings(actionConf, "oozie.pig.params");
133            for (String param : params) {
134                arguments.add("-param");
135                arguments.add(param);
136            }
137    
138            String hadoopJobId = System.getProperty("oozie.launcher.job.id");
139            if (hadoopJobId == null) {
140                throw new RuntimeException("Launcher Hadoop Job ID system property not set");
141            }
142    
143            String logFile = new File("pig-oozie-" + hadoopJobId + ".log").getAbsolutePath();
144    
145            URL log4jFile = Thread.currentThread().getContextClassLoader().getResource("log4j.properties");
146            if (log4jFile != null) {
147    
148                String pigLogLevel = actionConf.get("oozie.pig.log.level", "INFO");
149    
150                // append required PIG properties to the default hadoop log4j file
151                Properties hadoopProps = new Properties();
152                hadoopProps.load(log4jFile.openStream());
153                hadoopProps.setProperty("log4j.logger.org.apache.pig", pigLogLevel + ", A, B");
154                hadoopProps.setProperty("log4j.appender.A", "org.apache.log4j.ConsoleAppender");
155                hadoopProps.setProperty("log4j.appender.A.layout", "org.apache.log4j.PatternLayout");
156                hadoopProps.setProperty("log4j.appender.A.layout.ConversionPattern", "%-4r [%t] %-5p %c %x - %m%n");
157                hadoopProps.setProperty("log4j.appender.B", "org.apache.log4j.FileAppender");
158                hadoopProps.setProperty("log4j.appender.B.file", logFile);
159                hadoopProps.setProperty("log4j.appender.B.layout", "org.apache.log4j.PatternLayout");
160                hadoopProps.setProperty("log4j.appender.B.layout.ConversionPattern", "%-4r [%t] %-5p %c %x - %m%n");
161    
162                String localProps = new File("piglog4j.properties").getAbsolutePath();
163                OutputStream os1 = new FileOutputStream(localProps);
164                hadoopProps.store(os1, "");
165                os1.close();
166    
167                arguments.add("-log4jconf");
168                arguments.add(localProps);
169    
170                // print out current directory
171                File localDir = new File(localProps).getParentFile();
172                System.out.println("Current (local) dir = " + localDir.getAbsolutePath());
173            }
174            else {
175                System.out.println("log4jfile is null");
176            }
177    
178            String pigLog = "pig-" + hadoopJobId + ".log";
179            arguments.add("-logfile");
180            arguments.add(pigLog);
181    
182            String[] pigArgs = MapReduceMain.getStrings(actionConf, "oozie.pig.args");
183            for (String pigArg : pigArgs) {
184                if (DISALLOWED_PIG_OPTIONS.contains(pigArg)) {
185                    throw new RuntimeException("Error: Pig argument " + pigArg + " is not supported");
186                }
187                arguments.add(pigArg);
188            }
189    
190            System.out.println("Pig command arguments :");
191            for (String arg : arguments) {
192                System.out.println("             " + arg);
193            }
194    
195            System.out.println("=================================================================");
196            System.out.println();
197            System.out.println(">>> Invoking Pig command line now >>>");
198            System.out.println();
199            System.out.flush();
200    
201            System.out.println();
202            runPigJob(new String[] { "-version" }, null, true);
203            System.out.println();
204            System.out.flush();
205    
206            runPigJob(arguments.toArray(new String[arguments.size()]), pigLog, false);
207    
208            System.out.println();
209            System.out.println("<<< Invocation of Pig command completed <<<");
210            System.out.println();
211    
212            // harvesting and recording Hadoop Job IDs
213            Properties jobIds = getHadoopJobIds(logFile);
214            File file = new File(System.getProperty("oozie.action.output.properties"));
215            os = new FileOutputStream(file);
216            jobIds.store(os, "");
217            os.close();
218            System.out.println(" Hadoop Job IDs executed by Pig: " + jobIds.getProperty("hadoopJobs"));
219            System.out.println();
220        }
221    
222        private void handleError(String pigLog) throws Exception {
223            System.err.println();
224            System.err.println("Pig logfile dump:");
225            System.err.println();
226            try {
227                BufferedReader reader = new BufferedReader(new FileReader(pigLog));
228                String line = reader.readLine();
229                while (line != null) {
230                    System.err.println(line);
231                    line = reader.readLine();
232                }
233                reader.close();
234            }
235            catch (FileNotFoundException e) {
236                System.err.println("pig log file: " + pigLog + "  not found.");
237            }
238        }
239    
240        /**
241         * @param args pig command line arguments
242         * @param pigLog pig log file
243         * @param resetSecurityManager specify if need to reset security manager
244         * @throws Exception
245         */
246        protected void runPigJob(String[] args, String pigLog, boolean resetSecurityManager) throws Exception {
247            // running as from the command line
248            boolean pigRunnerExists = true;
249            Class klass;
250            try {
251                klass = Class.forName("org.apache.pig.PigRunner");
252            }
253            catch (ClassNotFoundException ex) {
254                pigRunnerExists = false;
255            }
256    
257            if (pigRunnerExists) {
258                System.out.println("Run pig script using PigRunner.run() for Pig version 0.8+");
259                PigStats stats = PigRunner.run(args, null);
260                int code = stats.getReturnCode();
261                if (code != 0) {
262                    if (pigLog != null) {
263                        handleError(pigLog);
264                    }
265                    throw new LauncherMainException(code);
266                }
267            }
268            else {
269                try {
270                    System.out.println("Run pig script using Main.main() for Pig version before 0.8");
271                    Main.main(args);
272                }
273                catch (SecurityException ex) {
274                    if (resetSecurityManager) {
275                        LauncherSecurityManager.reset();
276                    }
277                    else {
278                        if (LauncherSecurityManager.getExitInvoked()) {
279                            if (LauncherSecurityManager.getExitCode() != 0) {
280                                if (pigLog != null) {
281                                    handleError(pigLog);
282                                }
283                                throw ex;
284                            }
285                        }
286                    }
287                }
288            }
289        }
290    
291        public static void setPigScript(Configuration conf, String script, String[] params, String[] args) {
292            conf.set("oozie.pig.script", script);
293            MapReduceMain.setStrings(conf, "oozie.pig.params", params);
294            MapReduceMain.setStrings(conf, "oozie.pig.args", args);
295        }
296    
297        private static final String JOB_ID_LOG_PREFIX = "HadoopJobId: ";
298    
299        protected Properties getHadoopJobIds(String logFile) throws IOException {
300            int jobCount = 0;
301            Properties props = new Properties();
302            StringBuffer sb = new StringBuffer(100);
303            if (new File(logFile).exists() == false) {
304                System.err.println("pig log file: " + logFile + "  not present. Therefore no Hadoop jobids found");
305                props.setProperty("hadoopJobs", "");
306            }
307            else {
308                BufferedReader br = new BufferedReader(new FileReader(logFile));
309                String line = br.readLine();
310                String separator = "";
311                while (line != null) {
312                    if (line.contains(JOB_ID_LOG_PREFIX)) {
313                        int jobIdStarts = line.indexOf(JOB_ID_LOG_PREFIX) + JOB_ID_LOG_PREFIX.length();
314                        String jobId = line.substring(jobIdStarts);
315                        int jobIdEnds = jobId.indexOf(" ");
316                        if (jobIdEnds > -1) {
317                            jobId = jobId.substring(0, jobId.indexOf(" "));
318                        }
319                        sb.append(separator).append(jobId);
320                        separator = ",";
321                    }
322                    line = br.readLine();
323                }
324                br.close();
325                props.setProperty("hadoopJobs", sb.toString());
326            }
327            return props;
328        }
329    
330    }