/*
 * Decompiled with CFR 0.152.
 */
package com.jsbsoft.jtf.textsearch.sitesdistants;

import com.jsbsoft.jtf.textsearch.Index;
import com.jsbsoft.jtf.textsearch.Indexer;
import com.jsbsoft.jtf.textsearch.RechercheFmt;
import com.jsbsoft.jtf.textsearch.Searcher;
import com.jsbsoft.jtf.textsearch.sitesdistants.QueueFluxHTML;
import com.jsbsoft.jtf.textsearch.sitesdistants.QueueSiteAIndexer;
import com.jsbsoft.jtf.textsearch.sitesdistants.RechercheSitesDistants;
import com.jsbsoft.jtf.textsearch.sitesdistants.ThreadAspirateur;
import com.jsbsoft.jtf.textsearch.sitesdistants.URLQueue;
import com.jsbsoft.jtf.webutils.ContextePage;
import com.kportal.core.cluster.ClusterHelper;
import com.kportal.core.config.PropertyHelper;
import com.univ.objetspartages.om.EtatFiche;
import com.univ.objetspartages.om.Site;
import com.univ.utils.Chaine;
import com.univ.xhtml.HTMLParser;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import java.util.Vector;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;
import org.apache.oro.text.regex.MalformedPatternException;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.Perl5Compiler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IndexeurSitesDistants {
    public static String PARAM_JTF_NBTHREADS = "lucene.nb_threads_aspiration";
    public static String PARAM_JTF_TIMEOUT = "lucene.timeout_threads";
    public static String PARAM_JTF_MAXSIZE_HTMLQUEUE = "lucene.max_size_queue";
    public static String PARAM_JTF_TIME_SLEEP = "lucene.time_sleep";
    public static int NB_MAX_THREADS_DEFAUT = 4;
    public static int TIMEOUT_DEFAUT = 10;
    private Logger logger = LoggerFactory.getLogger(IndexeurSitesDistants.class);
    private static IndexeurSitesDistants _instance;
    private final QueueSiteAIndexer queueSites = new QueueSiteAIndexer();
    private int nNiveauCourantProfondeur;
    private int nNiveauMaxProfondeur;
    private int nNbThreadsCourants;
    private int nNbMaxThreads;
    private int nTimeout;
    private int maxSizeQueue;
    private long timeToSleep;
    private boolean bFinAspiration;
    private boolean bFinIndexation = true;
    private URLQueue queueURL;
    private QueueFluxHTML queueHTML;
    private Pattern patternURLAcceptation;
    private Pattern patternURLRefus;
    private String szUrlSite;
    private String[] aszUrlDisallows;

    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    public static synchronized IndexeurSitesDistants getInstance() {
        if (_instance == null) {
            _instance = new IndexeurSitesDistants();
        }
        return _instance;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void indexeSite(Site site, boolean join) throws Exception {
        try {
            this.queueSites.push(site);
            this.lanceIndexations(join);
        }
        catch (InterruptedException e) {
            this.logger.error(e.getMessage(), (Throwable)e);
        }
        finally {
            ClusterHelper.refresh(Searcher.getInstance(), null);
        }
    }

    public Map<Long, String> getEtatsSites() {
        return this.queueSites.getEtats();
    }

    public void indexeSites(ContextePage ctx) throws Exception {
        Site site = new Site();
        site.init();
        site.setCtx(ctx);
        site.select("");
        while (site.nextItem()) {
            this.indexeSite((Site)site.clone(), true);
        }
    }

    public boolean indexationEnCours() {
        return !this.bFinIndexation;
    }

    public int getNbThreadsCourants() {
        return this.nNbThreadsCourants;
    }

    public int getNbMaxThreads() {
        return this.nNbMaxThreads;
    }

    public int getNiveauCourantProfondeur() {
        return this.nNiveauCourantProfondeur;
    }

    public int getNiveauMaxProfondeur() {
        return this.nNiveauMaxProfondeur;
    }

    public boolean accepteUrlRobots(String szUrl) {
        if (this.aszUrlDisallows == null) {
            return true;
        }
        for (String aszUrlDisallow : this.aszUrlDisallows) {
            if (szUrl.indexOf(aszUrlDisallow) == -1) continue;
            this.logger.debug("\t!!URL refusee (robots.txt) : " + szUrl);
            return false;
        }
        return true;
    }

    public Pattern getPatternURLAcceptation() {
        return this.patternURLAcceptation;
    }

    public Pattern getPatternURLRefus() {
        return this.patternURLRefus;
    }

    protected void lanceIndexations(boolean join) throws InterruptedException {
        if (this.bFinIndexation) {
            this.bFinIndexation = false;
            Thread threadIndexation = new Thread(){

                @Override
                public void run() {
                    int nbPages = 0;
                    Date dateDeb = null;
                    Date dateFin = null;
                    Site site = IndexeurSitesDistants.this.queueSites.pop();
                    while (site != null) {
                        dateDeb = new Date();
                        IndexeurSitesDistants.this.logger.info("%%%% D\u00e9but de l'indexation de " + site.getUrl() + " \u00e0 " + dateDeb);
                        try {
                            nbPages = IndexeurSitesDistants.this.indexe(site);
                        }
                        catch (Exception e) {
                            IndexeurSitesDistants.this.logger.error("erreur lors de l'indexation", (Throwable)e);
                            nbPages = 0;
                        }
                        IndexeurSitesDistants.this.queueSites.finIndexation(site);
                        dateFin = new Date();
                        IndexeurSitesDistants.this.logger.info("%%%% Fin de l'indexation de " + site.getUrl() + " en " + (dateFin.getTime() - dateDeb.getTime()) / 1000L + "s - " + nbPages + " pages index\u00e9es.");
                        RechercheSitesDistants.init();
                        site = IndexeurSitesDistants.this.queueSites.pop();
                    }
                    IndexeurSitesDistants.this.bFinIndexation = true;
                }
            };
            threadIndexation.start();
            if (join) {
                threadIndexation.join();
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected int indexe(Site siteDistant) throws MalformedURLException, MalformedPatternException, IOException {
        IndexWriter writer = null;
        int nb = 0;
        try {
            writer = this.prepareIndexation(siteDistant);
            writer.deleteAll();
            writer.commit();
            this.queueURL.push(new URL(this.szUrlSite), 0);
            this.lanceThreadsAspiration();
            nb = this.indexePagesHTML(writer, siteDistant);
        }
        finally {
            if (writer != null) {
                writer.forceMerge(1, true);
                writer.close();
            }
        }
        return nb;
    }

    protected IndexWriter prepareIndexation(Site siteDistant) throws MalformedPatternException, IOException {
        this.nNiveauCourantProfondeur = 0;
        this.nNiveauMaxProfondeur = siteDistant.getNiveauProfondeur();
        this.szUrlSite = siteDistant.getUrl();
        this.bFinAspiration = false;
        this.queueHTML = new QueueFluxHTML();
        this.queueURL = new URLQueue();
        this.queueURL.setMaxElements(-1);
        this.litRobotsTxt();
        String szRegExpUrl = siteDistant.getRegExpAccepte();
        this.patternURLAcceptation = szRegExpUrl != null && !szRegExpUrl.equals("") ? new Perl5Compiler().compile(szRegExpUrl) : null;
        szRegExpUrl = siteDistant.getRegExpRefuse();
        this.patternURLRefus = szRegExpUrl != null && !szRegExpUrl.equals("") ? new Perl5Compiler().compile(szRegExpUrl) : null;
        File repertoireIndexation = siteDistant.getRepertoireIndexation();
        if (!repertoireIndexation.exists()) {
            repertoireIndexation.mkdir();
        }
        Directory directory = Searcher.getInstance().getDirectory(repertoireIndexation);
        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, Indexer.analyzer);
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        return new IndexWriter(directory, conf);
    }

    protected synchronized void lanceThreadsAspiration() {
        int nNbThreads = this.nNbMaxThreads - this.nNbThreadsCourants;
        int nTailleQueueURL = this.queueURL.getQueueSize(this.nNiveauCourantProfondeur);
        if (nTailleQueueURL < nNbThreads || this.nNbMaxThreads == -1) {
            nNbThreads = nTailleQueueURL;
        }
        for (int n = 1; n <= nNbThreads; ++n) {
            ThreadAspirateur threadAspi = new ThreadAspirateur(this, this.szUrlSite, this.nNbThreadsCourants++, this.nNiveauCourantProfondeur, this.queueURL, this.queueHTML, this.maxSizeQueue, this.timeToSleep);
            threadAspi.start();
        }
    }

    protected synchronized void finTraitementThreadAspiration(int threadId) {
        --this.nNbThreadsCourants;
        if (this.nNbThreadsCourants == 0) {
            ++this.nNiveauCourantProfondeur;
            if (this.nNiveauCourantProfondeur > this.nNiveauMaxProfondeur) {
                return;
            }
            if (this.queueURL.getQueueSize(this.nNiveauCourantProfondeur) == 0) {
                this.bFinAspiration = true;
                return;
            }
            this.lanceThreadsAspiration();
        }
    }

    protected int indexePagesHTML(IndexWriter writer, Site indexation) throws IOException {
        int nIterationVide = 0;
        int nbPage = 0;
        while (!this.bFinAspiration) {
            QueueFluxHTML.FluxHTML fluxHTML = this.queueHTML.pop();
            if (fluxHTML != null) {
                try {
                    Index index = this.creeIndex(fluxHTML, indexation);
                    Document docPageHTML = index.creerDocument();
                    writer.addDocument(docPageHTML);
                    this.logger.debug("==Indexation de " + fluxHTML.getUrl() + " terminee.");
                    ++nbPage;
                }
                catch (Exception e) {
                    this.logger.error("Exception lors de l'indexation de : " + fluxHTML.getUrl(), (Throwable)e);
                }
                continue;
            }
            try {
                Thread.sleep(1000L);
                nIterationVide = this.queueHTML.getTaille() != 0 || this.nNbThreadsCourants != 0 ? 0 : ++nIterationVide;
                if (nIterationVide != this.nTimeout) continue;
                this.bFinAspiration = true;
            }
            catch (Exception e) {
                this.logger.error("Erreur los de l'indexation des pages HTML", (Throwable)e);
            }
        }
        return nbPage;
    }

    private Index creeIndex(QueueFluxHTML.FluxHTML fluxHTML, Site indexation) throws Exception {
        HTMLParser htmlParser = new HTMLParser();
        htmlParser.setInputHtml(fluxHTML.getPage());
        String chaine = htmlParser.extractString(false);
        String title = htmlParser.getTitle();
        if (title.trim().length() == 0) {
            title = indexation.getLibelle();
        }
        String keywords = htmlParser.getMetaTag("keywords");
        String description = htmlParser.getMetaTag("description");
        chaine = Chaine.encode(chaine, "UTF-8");
        title = Chaine.encode(title, "UTF-8");
        keywords = Chaine.encode(keywords, "UTF-8");
        description = Chaine.encode(description, "UTF-8");
        Index index = new Index();
        String url = fluxHTML.getUrl();
        Vector<String> codeRubrique = new Vector<String>();
        codeRubrique.add(indexation.getCode());
        index.setCodeRubrique(codeRubrique);
        index.setLangue("0");
        index.setContent(RechercheFmt.formaterTexteRecherche(chaine, Boolean.FALSE, Boolean.FALSE));
        index.setContentFile("");
        index.setTitle(title);
        index.setIdentifiantUnique(url);
        index.setUrl(url);
        index.setEtatFiche(EtatFiche.EN_LIGNE.getEtat());
        index.setLastModified(new SimpleDateFormat("yyyyMMdd").format(new Date()));
        index.setMiseEnLigne(new SimpleDateFormat("yyyyMMdd").format(new Date()));
        index.setKeywords(keywords);
        index.setDescription(description);
        return index;
    }

    protected void litRobotsTxt() {
        try {
            URL urlRobotsTxt = new URL(this.szUrlSite.substring(0, this.szUrlSite.lastIndexOf(47) + 1) + "robots.txt");
            BufferedReader bReader = new BufferedReader(new InputStreamReader(urlRobotsTxt.openConnection().getInputStream()));
            this.aszUrlDisallows = HTMLParser.parseRobots(bReader);
        }
        catch (Exception e) {
            this.aszUrlDisallows = null;
        }
    }

    private IndexeurSitesDistants() {
        String sMaxSizeQueue;
        String szTimeout;
        String szNbThreadJTF = PropertyHelper.getCoreProperty(PARAM_JTF_NBTHREADS);
        if (szNbThreadJTF != null) {
            try {
                this.nNbMaxThreads = Integer.parseInt(szNbThreadJTF);
            }
            catch (Exception e) {
                this.nNbMaxThreads = NB_MAX_THREADS_DEFAUT;
            }
        } else {
            this.nNbMaxThreads = NB_MAX_THREADS_DEFAUT;
        }
        if ((szTimeout = PropertyHelper.getCoreProperty(PARAM_JTF_TIMEOUT)) != null) {
            try {
                this.nTimeout = Integer.parseInt(szTimeout);
            }
            catch (Exception e) {
                this.nTimeout = TIMEOUT_DEFAUT;
            }
        } else {
            this.nTimeout = TIMEOUT_DEFAUT;
        }
        this.maxSizeQueue = (sMaxSizeQueue = PropertyHelper.getCoreProperty(PARAM_JTF_MAXSIZE_HTMLQUEUE)) != null ? Integer.parseInt(sMaxSizeQueue) : 1000;
        String sTimeToSleep = PropertyHelper.getCoreProperty(PARAM_JTF_TIME_SLEEP);
        this.timeToSleep = sTimeToSleep != null ? Long.parseLong(sTimeToSleep) : 30000L;
        this.logger.debug("%%%% Nombre de threads affectes a l'aspiration: " + this.nNbMaxThreads + " - Timeout: " + this.nTimeout + "s");
    }
}

