/*
 * Decompiled with CFR 0.152.
 */
package com.univ.xhtml;

import com.univ.xhtml.HTMLLinkExtractor;
import com.univ.xhtml.HTMLStringExtractor;
import com.univ.xhtml.tags.HeadLinkTag;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.MetaTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.TitleTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HTMLParser
extends Parser {
    private static final long serialVersionUID = 241127507423866036L;
    private static Logger LOG = LoggerFactory.getLogger(HTMLParser.class);
    private String inputHtml = "";

    public HTMLParser() throws ParserException {
    }

    public HTMLParser(String url) throws ParserException {
        super(url);
    }

    public HTMLParser(URLConnection urlConnection) throws ParserException {
        super(urlConnection);
    }

    public void setInputHtml(String input) {
        this.inputHtml = input;
    }

    public String getInputHtml() {
        return this.inputHtml;
    }

    public void parse() throws ParserException {
        this.inputHtml = this.parse(null).toHtml();
    }

    public List<String> extractLinks(String urlBase) {
        HTMLLinkExtractor htmlLinkExtractor = HTMLLinkExtractor.GetInstance();
        return htmlLinkExtractor.extractLinksHtmlFromInput(urlBase, this.inputHtml);
    }

    public String extractString(boolean links) {
        HTMLStringExtractor stringExtractor = HTMLStringExtractor.GetInstance();
        return stringExtractor.extractAllStringsFromInput(this.inputHtml, links);
    }

    public String getTitle() {
        String res = "";
        HTMLStringExtractor stringExtractor = HTMLStringExtractor.GetInstance();
        NodeList nodeList = stringExtractor.extractTagFromInput(this.inputHtml, new TagNameFilter(new TitleTag().getTagName()));
        if (nodeList.size() == 1) {
            res = ((TitleTag)nodeList.elementAt(0)).getStringText();
        }
        return res;
    }

    public String getCanonicalLink() {
        String res = "";
        HTMLStringExtractor stringExtractor = HTMLStringExtractor.GetInstance();
        NodeList nodeList = stringExtractor.extractTagFromInput(this.inputHtml, new TagNameFilter(new HeadLinkTag().getTagName()));
        HeadLinkTag linkTag = new HeadLinkTag();
        for (int i = 0; i < nodeList.size(); ++i) {
            linkTag = (TagNode)nodeList.elementAt(i);
            if (!"canonical".equals(linkTag.getAttribute("rel")) || linkTag.getAttribute("href") == null) continue;
            res = linkTag.getAttribute("href");
            break;
        }
        return res;
    }

    public String getMetaTag(String properties) {
        String res = "";
        HTMLStringExtractor stringExtractor = HTMLStringExtractor.GetInstance();
        NodeList nodeList = stringExtractor.extractTagFromInput(this.inputHtml, new TagNameFilter(new MetaTag().getTagName()));
        MetaTag metaTag = new MetaTag();
        for (int i = 0; i < nodeList.size(); ++i) {
            metaTag = (MetaTag)nodeList.elementAt(i);
            if (!properties.equalsIgnoreCase(metaTag.getMetaTagName()) && !properties.equalsIgnoreCase(metaTag.getHttpEquiv())) continue;
            res = metaTag.getMetaContent();
            break;
        }
        return res;
    }

    public static String[] parseRobots(BufferedReader r) throws IOException {
        String line;
        Pattern disallowPattern = Pattern.compile("Disallow:(.*)");
        Pattern agentPattern = Pattern.compile("User-agent:\\*");
        boolean all = false;
        ArrayList<String> disallowed = new ArrayList<String>();
        while ((line = r.readLine()) != null) {
            Matcher matcher;
            if ((line = StringUtils.remove((String)line, (String)" ")).startsWith("User-agent:")) {
                matcher = agentPattern.matcher(line);
                if (matcher.find()) {
                    all = true;
                    continue;
                }
                all = false;
                continue;
            }
            if (!all || !line.startsWith("Disallow:")) continue;
            matcher = disallowPattern.matcher(line);
            while (matcher.find()) {
                disallowed.add(matcher.group(1));
            }
        }
        String[] disalloweds = new String[disallowed.size()];
        disallowed.toArray(disalloweds);
        return disalloweds;
    }

    public void processAbsoluteUrl(String urlBase) throws ParserException {
        StringBuffer outputHtml = new StringBuffer();
        try {
            Parser parser = new Parser();
            parser.setInputHTML(this.inputHtml);
            NodeIterator e = parser.elements();
            while (e.hasMoreNodes()) {
                Node node = e.nextNode();
                this.processNodeAbsoluteUrl(node, urlBase);
                outputHtml.append(node.toHtml());
            }
        }
        catch (Exception e) {
            LOG.error("erreur de parsing de l'html", (Throwable)e);
        }
        this.setInputHtml(outputHtml.toString());
    }

    private void processNodeAbsoluteUrl(Node node, String urlBase) throws ParserException {
        if (node instanceof TagNode && !(node instanceof ScriptTag)) {
            HasAttributeFilter backgroundFilter;
            HasAttributeFilter actionFilter;
            HasAttributeFilter hrefFilter;
            HasAttributeFilter srcFilter;
            OrFilter nf;
            TagNode tag = (TagNode)node;
            NodeList nl = tag.getChildren();
            if (null != nl) {
                SimpleNodeIterator i = nl.elements();
                while (i.hasMoreNodes()) {
                    this.processNodeAbsoluteUrl(i.nextNode(), urlBase);
                }
            }
            if ((nf = new OrFilter((NodeFilter)new OrFilter((NodeFilter)(srcFilter = new HasAttributeFilter("src")), (NodeFilter)(hrefFilter = new HasAttributeFilter("href"))), (NodeFilter)new OrFilter((NodeFilter)(actionFilter = new HasAttributeFilter("action")), (NodeFilter)(backgroundFilter = new HasAttributeFilter("background"))))).accept(node)) {
                String link = "";
                if (node instanceof LinkTag) {
                    link = ((LinkTag)node).extractLink();
                } else if (node instanceof ImageTag) {
                    link = ((ImageTag)node).extractImageLocn();
                } else if (node instanceof TagNode) {
                    if (((TagNode)node).getAttribute("src") != null) {
                        link = ((TagNode)node).getAttribute("src");
                    } else if (((TagNode)node).getAttribute("href") != null) {
                        link = ((TagNode)node).getAttribute("href");
                    } else if (((TagNode)node).getAttribute("action") != null) {
                        link = ((TagNode)node).getAttribute("action");
                    } else if (((TagNode)node).getAttribute("background") != null) {
                        link = ((TagNode)node).getAttribute("background");
                    }
                }
                if (!link.toLowerCase().startsWith("http") && urlBase != null) {
                    if (link.toLowerCase().startsWith("www")) {
                        link = "http://" + link;
                    } else if (!link.startsWith("javascript")) {
                        try {
                            link = new URL(new URL(urlBase), link).toString();
                        }
                        catch (MalformedURLException malformedURLException) {
                            // empty catch block
                        }
                    }
                }
                if (((TagNode)node).getAttribute("src") != null) {
                    ((TagNode)node).setAttribute("src", link);
                } else if (((TagNode)node).getAttribute("href") != null) {
                    ((TagNode)node).setAttribute("href", link);
                } else if (((TagNode)node).getAttribute("action") != null) {
                    ((TagNode)node).setAttribute("action", link);
                } else if (((TagNode)node).getAttribute("background") != null) {
                    ((TagNode)node).setAttribute("background", link);
                }
            }
        }
    }
}

