/*
 * Decompiled with CFR 0.152.
 */
package com.jsbsoft.jtf.textsearch.sitesdistants;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.apache.oro.text.perl.Perl5Util;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TraitementPageHTML {
    public static final String REGEXP_HREF = "(([Ss][Rr][Cc])|([Hh][Rr][Ee][Ff]))[ \\t\\n\\x0B\\f\\r]*=[ \\t\\n\\x0B\\f\\r]*\"[^\"]+\"";
    public static final String REGEXP_NO_CSS_JSCRIPT_MAILTO = "\\.*(\\.css)|(javascript)|(mailto)|(gif)|(jpg)|(swf)";
    private static Logger LOG = LoggerFactory.getLogger(TraitementPageHTML.class);
    private static TraitementPageHTML _instance;
    private static Pattern _regexpPattern;
    private static Pattern _regexpPatternNoCSSJScriptMailTo;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public static TraitementPageHTML GetInstance() {
        if (_instance != null) return _instance;
        Class<TraitementPageHTML> clazz = TraitementPageHTML.class;
        synchronized (TraitementPageHTML.class) {
            if (_instance != null) return _instance;
            _instance = new TraitementPageHTML();
            try {
                Perl5Compiler compiler = new Perl5Compiler();
                _regexpPattern = compiler.compile(REGEXP_HREF);
                _regexpPatternNoCSSJScriptMailTo = compiler.compile(REGEXP_NO_CSS_JSCRIPT_MAILTO, 1);
            }
            catch (MalformedPatternException e) {
                LOG.error("mauvais pattern", (Throwable)e);
            }
            return _instance;
        }
    }

    public String getPageHTML(URL url) throws IOException {
        StringWriter sw = new StringWriter();
        this.sauvegardePage(url, sw);
        return sw.toString();
    }

    public List<String> extraireLiens(String szPage, String szUrlPremierePage) {
        URL urlBase = null;
        int nIndex = szUrlPremierePage.lastIndexOf(47);
        if (nIndex > 0) {
            try {
                urlBase = new URL(szUrlPremierePage.substring(0, nIndex));
            }
            catch (MalformedURLException e) {
                LOG.error("mauvaise URL", (Throwable)e);
            }
        }
        PatternMatcherInput pmiLigne = new PatternMatcherInput(szPage);
        Perl5Matcher matcher = new Perl5Matcher();
        ArrayList<String> lHref = new ArrayList<String>();
        while (matcher.contains(pmiLigne, _regexpPattern)) {
            String szResultat = matcher.getMatch().toString();
            String szHref = szResultat.substring(szResultat.indexOf(34) + 1, szResultat.lastIndexOf(34));
            int nIndexDiese = szHref.indexOf(35);
            if (nIndexDiese != -1) {
                szHref = szHref.substring(0, nIndexDiese);
            }
            if (matcher.contains(szHref, _regexpPatternNoCSSJScriptMailTo)) continue;
            if (!szHref.toLowerCase().startsWith("http") && urlBase != null) {
                try {
                    URL u = new URL(urlBase, szHref);
                    lHref.add(u.toString());
                }
                catch (MalformedURLException e) {
                    LOG.error("mauvaise URL", (Throwable)e);
                }
                continue;
            }
            lHref.add(szHref);
        }
        return lHref;
    }

    public String[] parse(BufferedReader r) throws IOException {
        String line;
        Perl5Util p = new Perl5Util();
        boolean isMe = false;
        boolean isAnon = false;
        ArrayList<String> disallowed = new ArrayList<String>();
        Object ua = null;
        while ((line = r.readLine()) != null) {
            String disallow;
            if (p.match("/^#.*/", line) || p.match("/^\\s*$/", line = p.substitute("s/\\s*\\#.* //", line)) || !p.match("/^Disallow:\\s*(.*)/i", line)) continue;
            if (ua == null) {
                isAnon = true;
            }
            if ((disallow = p.group(1)) == null || disallow.length() <= 0) {
                disallow = "/";
            }
            if (!isAnon) continue;
            disallowed.add(disallow);
        }
        String[] disalloweds = new String[disallowed.size()];
        disallowed.toArray(disalloweds);
        return disalloweds;
    }

    protected void sauvegardePage(URL url, Writer writer) throws IOException {
        BufferedInputStream in = new BufferedInputStream(url.openStream());
        int c = in.read();
        while (c != -1) {
            writer.write(c);
            c = in.read();
        }
    }

    private TraitementPageHTML() {
    }
}

