/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.segment;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.HadoopFSUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.SegmentReaderUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SegmentReader
extends Configured
implements Tool {
    private static final Random RANDOM = new Random();
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private boolean co = true;
    private boolean fe = true;
    private boolean ge = true;
    private boolean pa = true;
    private boolean pd = true;
    private boolean pt = true;
    private boolean recodeContent = false;
    private static final String[][] keys = new String[][]{{"co", "Content::\n"}, {"ge", "Crawl Generate::\n"}, {"fe", "Crawl Fetch::\n"}, {"pa", "Crawl Parse::\n"}, {"pd", "ParseData::\n"}, {"pt", "ParseText::\n"}};
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
    private static final int MODE_DUMP = 0;
    private static final int MODE_LIST = 1;
    private static final int MODE_GET = 2;

    public void dump(Path segment, Path output) throws IOException, InterruptedException, ClassNotFoundException {
        LOG.info("SegmentReader: dump segment: {}", (Object)segment);
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch SegmentReader: " + segment));
        Configuration conf = job.getConfiguration();
        if (this.ge) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "crawl_generate"));
        }
        if (this.fe) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "crawl_fetch"));
        }
        if (this.pa) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "crawl_parse"));
        }
        if (this.co) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "content"));
        }
        if (this.pd) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "parse_data"));
        }
        if (this.pt) {
            FileInputFormat.addInputPath((Job)job, (Path)new Path(segment, "parse_text"));
        }
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(InputCompatMapper.class);
        job.setReducerClass(InputCompatReducer.class);
        job.setJarByClass(SegmentReader.class);
        Path tempDir = new Path(conf.get("hadoop.tmp.dir", "/tmp") + "/segread-" + RANDOM.nextInt());
        FileSystem fs = tempDir.getFileSystem(conf);
        fs.delete(tempDir, true);
        FileOutputFormat.setOutputPath((Job)job, (Path)tempDir);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("SegmentReader", job);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        Path dumpFile = new Path(output, conf.get("segment.dump.dir", "dump"));
        FileSystem outFs = dumpFile.getFileSystem(conf);
        outFs.delete(dumpFile, true);
        FileStatus[] fstats = fs.listStatus(tempDir, HadoopFSUtil.getPassAllFilter());
        Path[] files = HadoopFSUtil.getPaths(fstats);
        int currentRecordNumber = 0;
        if (files.length > 0) {
            try (PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)outFs.create(dumpFile), StandardCharsets.UTF_8)));){
                for (int i = 0; i < files.length; ++i) {
                    Path partFile = files[i];
                    try {
                        currentRecordNumber = this.append(fs, conf, partFile, writer, currentRecordNumber);
                        continue;
                    }
                    catch (IOException exception) {
                        if (!LOG.isWarnEnabled()) continue;
                        LOG.warn("Couldn't copy the content of " + partFile.toString() + " into " + dumpFile.toString());
                        LOG.warn(exception.getMessage());
                    }
                }
            }
        }
        fs.delete(tempDir, true);
        LOG.info("SegmentReader: done");
    }

    private int append(FileSystem fs, Configuration conf, Path src, PrintWriter writer, int currentRecordNumber) throws IOException {
        try (BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)fs.open(src), StandardCharsets.UTF_8));){
            Object line = reader.readLine();
            while (line != null) {
                if (((String)line).startsWith("Recno:: ")) {
                    line = "Recno:: " + currentRecordNumber++;
                }
                writer.println((String)line);
                line = reader.readLine();
            }
            int n = currentRecordNumber;
            return n;
        }
    }

    public void get(final Path segment, final Text key, Writer writer, final Map<String, List<Writable>> results) throws Exception {
        int cnt;
        LOG.info("SegmentReader: get '{}'", (Object)key);
        ArrayList<Thread> threads = new ArrayList<Thread>();
        if (this.co) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getMapRecords(new Path(segment, "content"), key);
                        results.put("co", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        if (this.fe) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getMapRecords(new Path(segment, "crawl_fetch"), key);
                        results.put("fe", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        if (this.ge) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getSeqRecords(new Path(segment, "crawl_generate"), key);
                        results.put("ge", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        if (this.pa) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getSeqRecords(new Path(segment, "crawl_parse"), key);
                        results.put("pa", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        if (this.pd) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getMapRecords(new Path(segment, "parse_data"), key);
                        results.put("pd", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        if (this.pt) {
            threads.add(new Thread(){

                @Override
                public void run() {
                    try {
                        List<Writable> res = SegmentReader.this.getMapRecords(new Path(segment, "parse_text"), key);
                        results.put("pt", res);
                    }
                    catch (Exception e) {
                        LOG.error("Exception:", (Throwable)e);
                    }
                }
            });
        }
        Iterator it = threads.iterator();
        while (it.hasNext()) {
            ((Thread)it.next()).start();
        }
        do {
            cnt = 0;
            try {
                Thread.sleep(5000L);
            }
            catch (Exception exception) {
                // empty catch block
            }
            it = threads.iterator();
            while (it.hasNext()) {
                if (!((Thread)it.next()).isAlive()) continue;
                ++cnt;
            }
            if (cnt <= 0 || !LOG.isDebugEnabled()) continue;
            LOG.debug("(" + cnt + " to retrieve)");
        } while (cnt > 0);
        for (int i = 0; i < keys.length; ++i) {
            List<Writable> res = results.get(keys[i][0]);
            if (res != null && res.size() > 0) {
                for (int k = 0; k < res.size(); ++k) {
                    writer.write(keys[i][1]);
                    if (this.recodeContent && keys[i][0].equals("co")) {
                        Charset charset = SegmentReader.getCharset(((ParseData)results.get("pd").get(k)).getParseMeta());
                        writer.write(((Content)res.get(k)).toString(charset));
                    } else {
                        writer.write(res.get(k).toString());
                    }
                    writer.write(10);
                }
            }
            writer.flush();
        }
    }

    private List<Writable> getMapRecords(Path dir, Text key) throws Exception {
        MapFile.Reader[] readers = MapFileOutputFormat.getReaders((Path)dir, (Configuration)this.getConf());
        ArrayList<Writable> res = new ArrayList<Writable>();
        Class keyClass = readers[0].getKeyClass();
        Class valueClass = readers[0].getValueClass();
        if (!keyClass.getName().equals("org.apache.hadoop.io.Text")) {
            throw new IOException("Incompatible key (" + keyClass.getName() + ")");
        }
        Writable value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
        for (int i = 0; i < readers.length; ++i) {
            if (readers[i].get((WritableComparable)key, value) != null) {
                res.add(value);
                value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
                Text aKey = (Text)keyClass.getConstructor(new Class[0]).newInstance(new Object[0]);
                while (readers[i].next((WritableComparable)aKey, value) && aKey.equals((Object)key)) {
                    res.add(value);
                    value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
                }
            }
            readers[i].close();
        }
        return res;
    }

    private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
        SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders((Configuration)this.getConf(), (Path)dir);
        ArrayList<Writable> res = new ArrayList<Writable>();
        Class keyClass = readers[0].getKeyClass();
        Class valueClass = readers[0].getValueClass();
        if (!keyClass.getName().equals("org.apache.hadoop.io.Text")) {
            throw new IOException("Incompatible key (" + keyClass.getName() + ")");
        }
        WritableComparable aKey = (WritableComparable)keyClass.getConstructor(new Class[0]).newInstance(new Object[0]);
        Writable value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
        for (int i = 0; i < readers.length; ++i) {
            while (readers[i].next((Writable)aKey, value)) {
                if (!aKey.equals(key)) continue;
                res.add(value);
                value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
            }
            readers[i].close();
        }
        return res;
    }

    public static Charset getCharset(Metadata parseMeta) {
        Charset cs = StandardCharsets.UTF_8;
        String charset = parseMeta.get("CharEncodingForConversion");
        if (charset == null) {
            charset = parseMeta.get("Content-Encoding");
        }
        try {
            cs = Charset.forName(charset);
        }
        catch (Exception exception) {
            // empty catch block
        }
        return cs;
    }

    public void list(List<Path> dirs, Writer writer) throws Exception {
        writer.write("NAME\t\tGENERATED\tFETCHER START\t\tFETCHER END\t\tFETCHED\tPARSED\n");
        for (int i = 0; i < dirs.size(); ++i) {
            Path dir = dirs.get(i);
            SegmentReaderStats stats = new SegmentReaderStats();
            this.getStats(dir, stats);
            writer.write(dir.getName() + "\t");
            if (stats.generated == -1L) {
                writer.write("?");
            } else {
                writer.write("" + stats.generated);
            }
            writer.write("\t\t");
            if (stats.start == -1L) {
                writer.write("?\t");
            } else {
                writer.write(this.sdf.format(new Date(stats.start)));
            }
            writer.write("\t");
            if (stats.end == -1L) {
                writer.write("?");
            } else {
                writer.write(this.sdf.format(new Date(stats.end)));
            }
            writer.write("\t");
            if (stats.fetched == -1L) {
                writer.write("?");
            } else {
                writer.write("" + stats.fetched);
            }
            writer.write("\t");
            if (stats.parsed == -1L) {
                writer.write("?");
            } else {
                writer.write("" + stats.parsed);
            }
            writer.write("\n");
            writer.flush();
        }
    }

    public void getStats(Path segment, SegmentReaderStats stats) throws Exception {
        Path parseDir;
        Path fetchDir;
        long cnt = 0L;
        Text key = new Text();
        CrawlDatum val = new CrawlDatum();
        FileSystem fs = segment.getFileSystem(this.getConf());
        if (this.ge) {
            SequenceFile.Reader[] readers = SegmentReaderUtil.getReaders(new Path(segment, "crawl_generate"), this.getConf());
            for (int i = 0; i < readers.length; ++i) {
                while (readers[i].next((Writable)key, (Writable)val)) {
                    ++cnt;
                }
                readers[i].close();
            }
            stats.generated = cnt;
        }
        if (this.fe && fs.exists(fetchDir = new Path(segment, "crawl_fetch")) && fs.getFileStatus(fetchDir).isDirectory()) {
            cnt = 0L;
            long start = Long.MAX_VALUE;
            long end = Long.MIN_VALUE;
            CrawlDatum value = new CrawlDatum();
            MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders((Path)fetchDir, (Configuration)this.getConf());
            for (int i = 0; i < mreaders.length; ++i) {
                while (mreaders[i].next((WritableComparable)key, (Writable)value)) {
                    ++cnt;
                    if (value.getFetchTime() < start) {
                        start = value.getFetchTime();
                    }
                    if (value.getFetchTime() <= end) continue;
                    end = value.getFetchTime();
                }
                mreaders[i].close();
            }
            stats.start = start;
            stats.end = end;
            stats.fetched = cnt;
        }
        if (this.pd && fs.exists(parseDir = new Path(segment, "parse_data")) && fs.getFileStatus(parseDir).isDirectory()) {
            cnt = 0L;
            long errors = 0L;
            ParseData value = new ParseData();
            MapFile.Reader[] mreaders = MapFileOutputFormat.getReaders((Path)parseDir, (Configuration)this.getConf());
            for (int i = 0; i < mreaders.length; ++i) {
                while (mreaders[i].next((WritableComparable)key, (Writable)value)) {
                    ++cnt;
                    if (value.getStatus().isSuccess()) continue;
                    ++errors;
                }
                mreaders[i].close();
            }
            stats.parsed = cnt;
            stats.parseErrors = errors;
        }
    }

    public int run(String[] args) throws Exception {
        if (args.length < 2) {
            SegmentReader.usage();
            return -1;
        }
        int mode = -1;
        if (args[0].equals("-dump")) {
            mode = 0;
        } else if (args[0].equals("-list")) {
            mode = 1;
        } else if (args[0].equals("-get")) {
            mode = 2;
        }
        for (int i = 1; i < args.length; ++i) {
            if (args[i].equals("-nocontent")) {
                this.co = false;
                args[i] = null;
                continue;
            }
            if (args[i].equals("-nofetch")) {
                this.fe = false;
                args[i] = null;
                continue;
            }
            if (args[i].equals("-nogenerate")) {
                this.ge = false;
                args[i] = null;
                continue;
            }
            if (args[i].equals("-noparse")) {
                this.pa = false;
                args[i] = null;
                continue;
            }
            if (args[i].equals("-noparsedata")) {
                this.pd = false;
                args[i] = null;
                continue;
            }
            if (args[i].equals("-noparsetext")) {
                this.pt = false;
                args[i] = null;
                continue;
            }
            if (!args[i].equals("-recode")) continue;
            this.recodeContent = true;
            args[i] = null;
        }
        if (this.recodeContent) {
            LOG.info("Recoding charset of HTML content");
            this.getConf().setBoolean("segment.reader.content.recode", true);
        }
        switch (mode) {
            case 0: {
                String output;
                String input = args[1];
                if (input == null) {
                    System.err.println("Missing required argument: <segment_dir>");
                    SegmentReader.usage();
                    return -1;
                }
                String string = output = args.length > 2 ? args[2] : null;
                if (output == null) {
                    System.err.println("Missing required argument: <output>");
                    SegmentReader.usage();
                    return -1;
                }
                this.dump(new Path(input), new Path(output));
                return 0;
            }
            case 1: {
                ArrayList<Path> dirs = new ArrayList<Path>();
                for (int i = 1; i < args.length; ++i) {
                    if (args[i] == null) continue;
                    if (args[i].equals("-dir")) {
                        Path dir;
                        FileSystem fs;
                        FileStatus[] fstats;
                        Path[] files;
                        if ((files = HadoopFSUtil.getPaths(fstats = (fs = (dir = new Path(args[++i])).getFileSystem(this.getConf())).listStatus(dir, HadoopFSUtil.getPassDirectoriesFilter(fs)))) == null || files.length <= 0) continue;
                        dirs.addAll(Arrays.asList(files));
                        continue;
                    }
                    dirs.add(new Path(args[i]));
                }
                this.list(dirs, new OutputStreamWriter((OutputStream)System.out, StandardCharsets.UTF_8));
                return 0;
            }
            case 2: {
                String key;
                String input = args[1];
                if (input == null) {
                    System.err.println("Missing required argument: <segment_dir>");
                    SegmentReader.usage();
                    return -1;
                }
                String string = key = args.length > 2 ? args[2] : null;
                if (key == null) {
                    System.err.println("Missing required argument: <keyValue>");
                    SegmentReader.usage();
                    return -1;
                }
                this.get(new Path(input), new Text(key), new OutputStreamWriter((OutputStream)System.out, StandardCharsets.UTF_8), new HashMap<String, List<Writable>>());
                return 0;
            }
        }
        System.err.println("Invalid operation: " + args[0]);
        SegmentReader.usage();
        return -1;
    }

    private static void usage() {
        System.err.println("Usage: SegmentReader (-dump ... | -list ... | -get ...) [general options]\n");
        System.err.println("* General options:");
        System.err.println("\t-nocontent\tignore content directory");
        System.err.println("\t-nofetch\tignore crawl_fetch directory");
        System.err.println("\t-nogenerate\tignore crawl_generate directory");
        System.err.println("\t-noparse\tignore crawl_parse directory");
        System.err.println("\t-noparsedata\tignore parse_data directory");
        System.err.println("\t-noparsetext\tignore parse_text directory");
        System.err.println("\t-recode \ttry to recode HTML content from the page's\n\t        \toriginal charset to UTF-8\n");
        System.err.println();
        System.err.println("* SegmentReader -dump <segment_dir> <output> [general options]");
        System.err.println("  Dumps content of a <segment_dir> as a text file to <output>.\n");
        System.err.println("\t<segment_dir>\tname of the segment directory.");
        System.err.println("\t<output>\tname of the (non-existent) output directory.");
        System.err.println();
        System.err.println("* SegmentReader -list (<segment_dir1> ... | -dir <segments>) [general options]");
        System.err.println("  List a synopsis of segments in specified directories, or all segments in");
        System.err.println("  a directory <segments>, and print it on System.out\n");
        System.err.println("\t<segment_dir1> ...\tlist of segment directories to process");
        System.err.println("\t-dir <segments>\t\tdirectory that contains multiple segments");
        System.err.println();
        System.err.println("* SegmentReader -get <segment_dir> <keyValue> [general options]");
        System.err.println("  Get a specified record from a segment, and print it on System.out.\n");
        System.err.println("\t<segment_dir>\tname of the segment directory.");
        System.err.println("\t<keyValue>\tvalue of the key (url).");
        System.err.println("\t\tNote: put double-quotes around strings with spaces.");
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new SegmentReader(), (String[])args);
        System.exit(result);
    }

    public static class SegmentReaderStats {
        public long start = -1L;
        public long end = -1L;
        public long generated = -1L;
        public long fetched = -1L;
        public long fetchErrors = -1L;
        public long parsed = -1L;
        public long parseErrors = -1L;
    }

    public static class InputCompatReducer
    extends Reducer<Text, NutchWritable, Text, Text> {
        private long recNo = 0L;
        private boolean recodeContent = false;

        public void setup(Reducer.Context context) {
            this.recodeContent = context.getConfiguration().getBoolean("segment.reader.content.recode", false);
        }

        public void reduce(Text key, Iterable<NutchWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            StringBuffer dump = new StringBuffer();
            dump.append("\nRecno:: ").append(this.recNo++).append("\n");
            dump.append("URL:: " + key.toString() + "\n");
            Content content = null;
            Charset charset = StandardCharsets.UTF_8;
            for (NutchWritable val : values) {
                Writable value = val.get();
                if (value instanceof CrawlDatum) {
                    dump.append("\nCrawlDatum::\n").append(((CrawlDatum)value).toString());
                    continue;
                }
                if (value instanceof Content) {
                    if (this.recodeContent) {
                        content = (Content)value;
                        continue;
                    }
                    dump.append("\nContent::\n").append(((Content)value).toString());
                    continue;
                }
                if (value instanceof ParseData) {
                    dump.append("\nParseData::\n").append(((ParseData)value).toString());
                    if (!this.recodeContent) continue;
                    charset = SegmentReader.getCharset(((ParseData)value).getParseMeta());
                    continue;
                }
                if (value instanceof ParseText) {
                    dump.append("\nParseText::\n").append(((ParseText)value).toString());
                    continue;
                }
                if (!LOG.isWarnEnabled()) continue;
                LOG.warn("Unrecognized type: " + value.getClass());
            }
            if (this.recodeContent && content != null) {
                dump.append("\nContent::\n").append(content.toString(charset));
            }
            context.write((Object)key, (Object)new Text(dump.toString()));
        }
    }

    public static class TextOutputFormat
    extends FileOutputFormat<WritableComparable<?>, Writable> {
        public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
            Path segmentDumpFile;
            String name = TextOutputFormat.getUniqueFile((TaskAttemptContext)context, (String)"part", (String)"");
            Path dir = FileOutputFormat.getOutputPath((JobContext)context);
            FileSystem fs = dir.getFileSystem(context.getConfiguration());
            if (fs.exists(segmentDumpFile = new Path(FileOutputFormat.getOutputPath((JobContext)context), name))) {
                fs.delete(segmentDumpFile, true);
            }
            final PrintStream printStream = new PrintStream((OutputStream)fs.create(segmentDumpFile), false, StandardCharsets.UTF_8.name());
            return new RecordWriter<WritableComparable<?>, Writable>(){

                public synchronized void write(WritableComparable<?> key, Writable value) throws IOException {
                    printStream.println(value);
                }

                public synchronized void close(TaskAttemptContext context) throws IOException {
                    printStream.close();
                }
            };
        }
    }

    public static class InputCompatMapper
    extends Mapper<WritableComparable<?>, Writable, Text, NutchWritable> {
        private Text newKey = new Text();

        public void map(WritableComparable<?> key, Writable value, Mapper.Context context) throws IOException, InterruptedException {
            if (key instanceof Text) {
                this.newKey.set(key.toString());
                key = this.newKey;
            }
            context.write((Object)key, (Object)new NutchWritable(value));
        }
    }
}

