/*
 * Decompiled with CFR 0.152.
 */
package org.apache.orc.tools;

import java.util.ArrayList;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.StripeInformation;
import org.apache.orc.TypeDescription;
import org.apache.orc.tools.FileDump;

public class ScanData {
    private static final Options OPTIONS = new Options().addOption("v", "verbose", false, "Print exceptions").addOption("s", "schema", false, "Print schema").addOption("h", "help", false, "Provide help");

    static CommandLine parseCommandLine(String[] args) throws ParseException {
        return new DefaultParser().parse(OPTIONS, args);
    }

    static int calculateBestVectorSize(int indexStride) {
        if (indexStride == 0) {
            return 1024;
        }
        int batchCount = (indexStride + 1023) / 1024;
        return indexStride / batchCount;
    }

    static LocationInfo findStripeInfo(Reader reader, long row) {
        long firstRow = 0L;
        int stripeId = 0;
        for (StripeInformation stripe : reader.getStripes()) {
            long lastRow = firstRow + stripe.getNumberOfRows();
            if (firstRow <= row && row < lastRow) {
                return new LocationInfo(firstRow, lastRow, stripeId, row);
            }
            firstRow = lastRow;
            ++stripeId;
        }
        return new LocationInfo(reader.getNumberOfRows(), reader.getNumberOfRows(), reader.getStripes().size(), row);
    }

    static LocationInfo findRecoveryPoint(Reader reader, LocationInfo current, int batchSize) {
        long result;
        int stride = reader.getRowIndexStride();
        if (stride == 0 || current.row + (long)batchSize >= current.followingRow) {
            result = current.followingRow;
        } else {
            long rowInStripe = current.row + (long)batchSize - current.firstRow;
            result = Math.min(current.followingRow, current.firstRow + (rowInStripe + (long)stride - 1L) / (long)stride * (long)stride);
        }
        return ScanData.findStripeInfo(reader, result);
    }

    static boolean findBadColumns(Reader reader, LocationInfo current, int batchSize, TypeDescription column, boolean[] include) {
        include[column.getId()] = true;
        TypeDescription schema = reader.getSchema();
        boolean result = false;
        if (column.getChildren() == null) {
            int row = 0;
            try (RecordReader rows = reader.rows(reader.options().include(include));){
                rows.seekToRow(current.row);
                VectorizedRowBatch batch = schema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64, 1);
                for (row = 0; row < batchSize; ++row) {
                    rows.nextBatch(batch);
                }
            }
            catch (Throwable t) {
                System.out.printf("Column %d failed at row %d%n", column.getId(), current.row + (long)row);
                result = true;
            }
        } else {
            for (TypeDescription child : column.getChildren()) {
                result |= ScanData.findBadColumns(reader, current, batchSize, child, include);
            }
        }
        include[column.getId()] = false;
        return result;
    }

    static void main(Configuration conf, String[] args) throws ParseException {
        CommandLine cli = ScanData.parseCommandLine(args);
        if (cli.hasOption('h') || cli.getArgs().length == 0) {
            new HelpFormatter().printHelp("java -jar orc-tools-*.jar scan", OPTIONS);
            System.exit(1);
        } else {
            boolean printSchema = cli.hasOption('s');
            boolean printExceptions = cli.hasOption('v');
            ArrayList<String> badFiles = new ArrayList<String>();
            for (String file : cli.getArgs()) {
                try (Reader reader = FileDump.getReader(new Path(file), conf, badFiles);){
                    if (reader == null) continue;
                    TypeDescription schema = reader.getSchema();
                    if (printSchema) {
                        System.out.println(schema.toJson());
                    }
                    VectorizedRowBatch batch = schema.createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64, ScanData.calculateBestVectorSize(reader.getRowIndexStride()));
                    int batchSize = batch.getMaxSize();
                    long badBatches = 0L;
                    long currentRow = 0L;
                    long goodRows = 0L;
                    try (RecordReader rows = reader.rows();){
                        while (currentRow < reader.getNumberOfRows()) {
                            currentRow = rows.getRowNumber();
                            try {
                                if (!rows.nextBatch(batch)) {
                                    break;
                                }
                                goodRows += (long)batch.size;
                            }
                            catch (Exception e) {
                                ++badBatches;
                                LocationInfo current = ScanData.findStripeInfo(reader, currentRow);
                                LocationInfo recover = ScanData.findRecoveryPoint(reader, current, batchSize);
                                System.out.println("Unable to read batch at " + String.valueOf(current) + ", recovery at " + String.valueOf(recover));
                                if (printExceptions) {
                                    e.printStackTrace();
                                }
                                ScanData.findBadColumns(reader, current, batchSize, reader.getSchema(), new boolean[reader.getSchema().getMaximumId() + 1]);
                                if (recover.row >= reader.getNumberOfRows()) {
                                    break;
                                }
                                rows.seekToRow(recover.row);
                            }
                        }
                    }
                    if (badBatches != 0L) {
                        badFiles.add(file);
                    }
                    System.out.printf("File: %s, bad batches: %d, rows: %d/%d%n", file, badBatches, goodRows, reader.getNumberOfRows());
                }
                catch (Exception e) {
                    badFiles.add(file);
                    System.err.println("Unable to open file: " + file);
                    if (!printExceptions) continue;
                    e.printStackTrace();
                }
            }
            if (!badFiles.isEmpty()) {
                System.err.println(String.valueOf(badFiles) + " bad ORC files found.");
                System.exit(1);
            }
        }
    }

    static class LocationInfo {
        final long firstRow;
        final long followingRow;
        final int stripeId;
        final long row;

        LocationInfo(long firstRow, long followingRow, int stripeId, long row) {
            this.firstRow = firstRow;
            this.followingRow = followingRow;
            this.stripeId = stripeId;
            this.row = row;
        }

        public String toString() {
            return String.format("row %d in stripe %d (rows %d-%d)", this.row, this.stripeId, this.firstRow, this.followingRow);
        }
    }
}

