package co.cask.cdap.examples.wikipedia;

import co.cask.cdap.api.Resources;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.workflow.Value;
import co.cask.cdap.api.workflow.WorkflowToken;
import com.google.gson.Gson;
import com.google.gson.JsonSyntaxException;
import com.google.gson.annotations.SerializedName;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
import javax.ws.rs.core.MediaType;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.sweble.wikitext.engine.EngineException;
import org.sweble.wikitext.engine.ExpansionCallback;
import org.sweble.wikitext.engine.PageId;
import org.sweble.wikitext.engine.PageTitle;
import org.sweble.wikitext.engine.WtEngineImpl;
import org.sweble.wikitext.engine.config.WikiConfigImpl;
import org.sweble.wikitext.engine.utils.DefaultConfigEnWp;
import org.sweble.wikitext.example.TextConverter;
import org.sweble.wikitext.parser.parser.LinkTargetException;

/* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer.class */
public class WikiContentValidatorAndNormalizer extends AbstractMapReduce {
    public static final String NAME = WikiContentValidatorAndNormalizer.class.getSimpleName();

    /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper.class */
    public static class FilterNormalizerMapper extends Mapper<byte[], byte[], byte[], byte[]> {
        private static final Logger LOG = LoggerFactory.getLogger(FilterNormalizerMapper.class);
        private static final Gson GSON = new Gson();

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiContents.class */
        public static final class WikiContents {
            private String batchcomplete;
            private Query query;

            /* JADX INFO: Access modifiers changed from: private */
            /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiContents$Query.class */
            public static final class Query {
                private List<Normalized> normalized;
                private Map<String, Page> pages;

                /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiContents$Query$Normalized.class */
                private static final class Normalized {
                    private String from;
                    private String to;

                    private Normalized() {
                    }
                }

                /* JADX INFO: Access modifiers changed from: private */
                /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiContents$Query$Page.class */
                public static final class Page {
                    private long pageid;
                    private long ns;
                    private String title;
                    private List<Content> revisions;

                    /* JADX INFO: Access modifiers changed from: private */
                    /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiContents$Query$Page$Content.class */
                    public static final class Content {
                        private String contentformat;
                        private String contentmodel;

                        @SerializedName(MediaType.MEDIA_TYPE_WILDCARD)
                        private String contents;

                        private Content() {
                        }
                    }

                    private Page() {
                    }
                }

                private Query() {
                }
            }

            private WikiContents() {
            }
        }

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:co/cask/cdap/examples/wikipedia/WikiContentValidatorAndNormalizer$FilterNormalizerMapper$WikiTitleAndText.class */
        public static final class WikiTitleAndText {
            private final String title;
            private final String contents;

            private WikiTitleAndText(String str, String str2) {
                this.title = str;
                this.contents = str2;
            }
        }

        protected void map(byte[] bArr, byte[] bArr2, Mapper<byte[], byte[], byte[], byte[]>.Context context) throws IOException, InterruptedException {
            if (bArr == null) {
                LOG.debug("Found null key. Skipping record.");
                return;
            }
            if (bArr.length == 0) {
                LOG.debug("Found empty key. Skipping record.");
                return;
            }
            if (bArr2 == null) {
                LOG.debug("Found null value. Skipping record.");
                return;
            }
            if (bArr2.length == 0) {
                LOG.debug("Found empty value. Skipping record.");
                return;
            }
            try {
                WikiTitleAndText parse = parse(bArr2);
                if (parse == null) {
                    LOG.debug("No revisions found for page in Wikipedia. Skipping record.");
                    return;
                }
                try {
                    context.write(bArr, Bytes.toBytes(toPlainText(parse)));
                    context.getCounter("custom", "num.records").increment(1L);
                } catch (EngineException | LinkTargetException e) {
                    LOG.debug("Error while parsing wikitext for '{}': '{}'. Skipping record.", Bytes.toString(bArr), e.getMessage());
                }
            } catch (Exception e2) {
                LOG.debug("Unable to parse the provided Wikipedia data. Skipping record.", e2);
            } catch (JsonSyntaxException e3) {
                LOG.debug("Malformed JSON found as value. Wikipedia may not have an entry for the page '{}'", Bytes.toString(bArr));
            }
        }

        @Nullable
        private WikiTitleAndText parse(byte[] bArr) {
            Map map = ((WikiContents) GSON.fromJson(Bytes.toString(bArr), WikiContents.class)).query.pages;
            WikiContents.Query.Page page = (WikiContents.Query.Page) map.get(map.keySet().iterator().next());
            List list = page.revisions;
            if (list.isEmpty()) {
                return null;
            }
            return new WikiTitleAndText(page.title, ((WikiContents.Query.Page.Content) list.get(list.size() - 1)).contents);
        }

        private String toPlainText(WikiTitleAndText wikiTitleAndText) throws EngineException, LinkTargetException {
            WikiConfigImpl generate = DefaultConfigEnWp.generate();
            return (String) new TextConverter(generate, 120).go(new WtEngineImpl(generate).postprocess(new PageId(PageTitle.make(generate, wikiTitleAndText.title), 0L), wikiTitleAndText.contents, (ExpansionCallback) null).getPage());
        }

        protected /* bridge */ /* synthetic */ void map(Object obj, Object obj2, Mapper.Context context) throws IOException, InterruptedException {
            map((byte[]) obj, (byte[]) obj2, (Mapper<byte[], byte[], byte[], byte[]>.Context) context);
        }
    }

    @Override // co.cask.cdap.api.mapreduce.AbstractMapReduce
    protected void configure() {
        setName(NAME);
        setDescription("A MapReduce program that dumps page titles to a dataset.");
        setMapperResources(new Resources(512));
        setInputDataset("wikidata");
        setOutputDataset("normalized");
    }

    @Override // co.cask.cdap.api.mapreduce.AbstractMapReduce, co.cask.cdap.api.mapreduce.MapReduce
    public void beforeSubmit(MapReduceContext mapReduceContext) throws Exception {
        Job job = (Job) mapReduceContext.getHadoopJob();
        job.setMapperClass(FilterNormalizerMapper.class);
        job.setNumReduceTasks(0);
    }

    @Override // co.cask.cdap.api.mapreduce.AbstractMapReduce, co.cask.cdap.api.mapreduce.MapReduce
    public void onFinish(boolean z, MapReduceContext mapReduceContext) throws Exception {
        WorkflowToken workflowToken = mapReduceContext.getWorkflowToken();
        if (workflowToken != null) {
            workflowToken.put("result", Value.of(z));
        }
    }
}
