/**
 * Copyright (C) 2015 - 2018 Kosmos contact@kosmos.fr
 *
 * Projet: core
 * Version: 6.02.48
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * Created on 6 avr. 2006
 *
 * To change the template for this generated file go to
 * Window - Preferences - Java - Code Generation - Code and Comments
 */
package com.univ.xhtml;

import org.apache.commons.lang3.StringUtils;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.slf4j.LoggerFactory;

// TODO: Auto-generated Javadoc
/**
 * Extract plaintext strings from a web page. Illustrative program to gather the textual contents of a web page. Uses a {@link org.htmlparser.beans.StringBean StringBean} to
 * accumulate the user visible text (what a browser would display) into a single string.
 */
public class HTMLStringExtractor {

	private static org.slf4j.Logger LOG = LoggerFactory.getLogger(HTMLStringExtractor.class);

	/**
	 * Instantiates a new hTML string extractor.
	 */
	private HTMLStringExtractor() {}

	/** The _instance. */
	private static HTMLStringExtractor _instance;

	/**
	 * Gets the instance.
	 * 
	 * @return the hTML string extractor
	 */
	public static HTMLStringExtractor GetInstance() {
		if (_instance == null) {
			synchronized (HTMLStringExtractor.class) {
				// pour le mutlithread
				if (_instance == null) {
					_instance = new HTMLStringExtractor();
				}
			}
		}
		return _instance;
	}

	/**
	 * Extract the text from a page.
	 * 
	 * @param links
	 *            if <code>true</code> include hyperlinks in output.
	 * @param url
	 *            the url
	 * 
	 * @return The textual contents of the page.
	 * 
	 * @exception ParserException
	 *                If a parse error occurs.
	 */
	public String extractAllStrings(final String url, final boolean links) {
		String res = "";
		try {
			final StringBean sb = new StringBean();
			sb.setLinks(links);
			sb.setURL(url);
			res = sb.getStrings();
		} catch (final Exception e) {
			LOG.debug("Problème d'extraction du contenu", e);
		}
		return res;
	}

	/**
	 * Extract the text from a page.
	 * 
	 * @param input
	 *            the input
	 * @param tagNameFilter
	 *            the tag name filter
	 * 
	 * @return The textual contents of the page.
	 * 
	 * @exception ParserException
	 *                If a parse error occurs.
	 */
	public NodeList extractTagFromInput(final String input, final TagNameFilter tagNameFilter) {
		NodeList res = new NodeList();
		try {
			final Parser parser = new Parser();
			parser.setInputHTML(input);
			res = parser.extractAllNodesThatMatch(tagNameFilter);
		} catch (final Exception e) {
			LOG.debug("Problème d'extraction du contenu", e);
		}
		return res;
	}

	/**
	 * Extract all strings from input.
	 * 
	 * @param input
	 *            the input
	 * @param links
	 *            the links
	 * 
	 * @return the string
	 */
	public String extractAllStringsFromInput(final String input, final boolean links) {
		String res = StringUtils.EMPTY;
		if (input.length() > 0) {
			try {
				final StringBean sb = new StringBean();
				sb.setLinks(links);
				final Parser parser = new Parser();
				parser.setInputHTML(input);
				parser.visitAllNodesWith(sb);
				res = sb.getStrings();
			} catch (final ParserException e) {
				LOG.debug("Problème d'extraction du contenu", e);
			}
		}
		return StringUtils.defaultString(res);
	}
}
