/*
 * Decompiled with CFR 0.152.
 */
package com.jsbsoft.jtf.textsearch.sitesdistants;

import com.jsbsoft.jtf.textsearch.sitesdistants.IndexeurSitesDistants;
import com.jsbsoft.jtf.textsearch.sitesdistants.QueueFluxHTML;
import com.jsbsoft.jtf.textsearch.sitesdistants.URLQueue;
import com.univ.xhtml.HTMLParser;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.lang3.StringUtils;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.Perl5Matcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ThreadAspirateur
extends Thread {
    private final Logger LOG = LoggerFactory.getLogger(ThreadAspirateur.class);
    private final int nNiveauCourantProfondeur;
    private final int nIdThread;
    private final int maxSizeQueue;
    private final URLQueue queueURL;
    private final QueueFluxHTML queueHTML;
    private final IndexeurSitesDistants gThreadsAspi;
    private final long timeToSleep;

    public ThreadAspirateur(IndexeurSitesDistants gThreadsAspi, String szUrlBase, int nIdThread, int nNiveauCourantProfondeur, URLQueue queueURL, QueueFluxHTML queueHTML, int maxSizeQueue, long timeToSleep) {
        this.gThreadsAspi = gThreadsAspi;
        this.nIdThread = nIdThread;
        this.nNiveauCourantProfondeur = nNiveauCourantProfondeur;
        this.queueURL = queueURL;
        this.queueHTML = queueHTML;
        this.maxSizeQueue = maxSizeQueue;
        this.timeToSleep = timeToSleep;
    }

    @Override
    public void run() {
        URL url = this.queueURL.pop(this.nNiveauCourantProfondeur);
        while (url != null) {
            while (this.queueHTML.getTaille() >= this.maxSizeQueue) {
                try {
                    Thread.sleep(this.timeToSleep);
                }
                catch (Exception exception) {}
            }
            this.analysePage(url);
            if (this.gThreadsAspi.getNbMaxThreads() > this.gThreadsAspi.getNbThreadsCourants()) {
                try {
                    this.gThreadsAspi.lanceThreadsAspiration();
                }
                catch (Exception e) {
                    this.LOG.error("[" + this.nIdThread + "] " + e.toString());
                }
            }
            url = this.queueURL.pop(this.nNiveauCourantProfondeur);
        }
        this.gThreadsAspi.finTraitementThreadAspiration(this.nIdThread);
    }

    protected void analysePage(URL pageURL) {
        try {
            int nNiveauMaxProfondeur;
            String szUrlPage = pageURL.toString();
            URLConnection urlConnection = pageURL.openConnection();
            int responseCode = ((HttpURLConnection)urlConnection).getResponseCode();
            if (responseCode != 200 && responseCode != 301 && responseCode != 302) {
                this.LOG.error("Code " + responseCode + " (" + HttpStatus.getStatusText((int)responseCode) + ") : " + szUrlPage);
                return;
            }
            String szMimeType = urlConnection.getContentType();
            if (!szMimeType.startsWith("text")) {
                return;
            }
            HTMLParser htmlParser = new HTMLParser(urlConnection);
            htmlParser.parse();
            String szPage = htmlParser.getInputHtml();
            this.queueHTML.push(szPage, (String)StringUtils.defaultIfEmpty((CharSequence)htmlParser.getCanonicalLink(), (CharSequence)szUrlPage));
            String szUrlBase = szUrlPage;
            int nIndex = szUrlPage.lastIndexOf(47);
            if (nIndex > 0) {
                try {
                    szUrlBase = new URL(szUrlPage.substring(0, nIndex + 1)).toString();
                }
                catch (MalformedURLException e) {
                    this.LOG.error("mauvaise URL", (Throwable)e);
                }
            }
            if (this.nNiveauCourantProfondeur <= (nNiveauMaxProfondeur = this.gThreadsAspi.getNiveauMaxProfondeur()) || nNiveauMaxProfondeur == -1) {
                Pattern patternAcceptation = this.gThreadsAspi.getPatternURLAcceptation();
                Pattern patternRefus = this.gThreadsAspi.getPatternURLRefus();
                Perl5Matcher matcher = null;
                if (patternAcceptation != null || patternRefus != null) {
                    matcher = new Perl5Matcher();
                }
                String szLien = null;
                List<String> lLiens = htmlParser.extractLinks(szUrlBase);
                for (String string : lLiens) {
                    try {
                        szLien = string;
                        this.ajouteUrl(szLien, matcher, pageURL, patternAcceptation, patternRefus);
                    }
                    catch (MalformedURLException e) {
                        this.LOG.error("URL invalide : " + szLien);
                    }
                }
            }
        }
        catch (Exception e) {
            this.LOG.warn(e.getMessage());
        }
    }

    protected void ajouteUrl(String szLien, Perl5Matcher matcher, URL pageURL, Pattern patternAcceptation, Pattern patternRefus) throws MalformedURLException {
        boolean bAjouteLien = false;
        URL urlLien = new URL(pageURL, szLien);
        String szUrlAbsolue = urlLien.toString();
        if (!this.gThreadsAspi.accepteUrlRobots(urlLien.toString())) {
            return;
        }
        if (matcher != null) {
            if (patternAcceptation != null) {
                bAjouteLien = matcher.matches(szUrlAbsolue, patternAcceptation);
            }
            if (bAjouteLien && patternRefus != null) {
                bAjouteLien = !matcher.matches(szUrlAbsolue, patternRefus);
            }
        } else {
            bAjouteLien = true;
        }
        if (bAjouteLien) {
            if (this.gThreadsAspi.getNiveauMaxProfondeur() == -1) {
                this.queueURL.push(urlLien, this.nNiveauCourantProfondeur);
            } else {
                this.queueURL.push(urlLien, this.nNiveauCourantProfondeur + 1);
            }
        } else {
            this.LOG.debug("URL refusee : " + urlLien);
        }
    }
}

