/**
 * Copyright (C) 2015 - 2018 Kosmos contact@kosmos.fr
 *
 * Projet: core
 * Version: 6.02.48
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 *
 */
package com.univ.utils;

import java.io.BufferedReader;
import java.io.StringReader;
import java.util.regex.Matcher;

import org.apache.commons.lang3.StringUtils;

// TODO: Auto-generated Javadoc
/**
 * The Class HTMLtoTexteTransformer.
 *
 * @author malice
 */
public class HTMLtoTexteTransformer {

	// tag ouvert
	/** The Constant REGEXP_HTML_TAG. */
	private final static String REGEXP_HTML_TAG = "<([a-zA-Z0-9]+)[^>]*>";

	// tag fermé
	/** The Constant REGEXP_HTML_TAG. */
	private final static String REGEXP_HTML_END_TAG = "</([a-zA-Z0-9]+)>";

	//lien (on le recupere dans le texte)
	/** The Constant REGEXP_HTML_LINK. */
	private final static String REGEXP_HTML_LINK = "<[aA].*?[Hh][Rr][Ee][Ff]=\"?([a-zA-Z0-9\\:\\.\\/_\\-@,&;=\\?%$]*)\"?.*?>(.*?)</[aA]>";

	/**
	 * Instantiates a new hTM lto texte transformer.
	 */
	private HTMLtoTexteTransformer() {}

	/**
	 * Transformer html.
	 *
	 * @param _texteHtml
	 *            the _texte html
	 * @param apercu
	 *            the apercu
	 *
	 * @return the string
	 *
	 * @throws Exception
	 *             the exception
	 */
	public static String transformerHtml(final String _texteHtml, final boolean apercu) throws Exception {
		if (_texteHtml.trim().equals("")) {
			return _texteHtml;
		}
		String newTexte = _texteHtml;
		//on annule le remplacement du br car les \n ne sont pas interprétés par les clients mails
		final String cr = "<br />";
		StringBuffer sb = new StringBuffer();
		java.util.regex.Pattern p = java.util.regex.Pattern.compile(REGEXP_HTML_LINK);
		Matcher m = p.matcher(newTexte);
		while (m.find()) {
			final String text = m.group(2) + " : " + m.group(1);
			m.appendReplacement(sb, Matcher.quoteReplacement(text));
		}
		m.appendTail(sb);
		newTexte = sb.toString();
		p = java.util.regex.Pattern.compile(REGEXP_HTML_TAG);
		m = p.matcher(newTexte);
		sb = new StringBuffer();
		while (m.find()) {
			final String tag = m.group(1);
			if (tag.equalsIgnoreCase("br")) {
				m.appendReplacement(sb, "\n");
			} else {
				m.appendReplacement(sb, "");
			}
		}
		m.appendTail(sb);
		newTexte = sb.toString();
		p = java.util.regex.Pattern.compile(REGEXP_HTML_END_TAG);
		m = p.matcher(newTexte);
		sb = new StringBuffer();
		while (m.find()) {
			final String tag = m.group(1);
			if (tag.equalsIgnoreCase("li") || tag.equalsIgnoreCase("div") || tag.equalsIgnoreCase("p") || tag.equalsIgnoreCase("h")) {
				m.appendReplacement(sb, "\n");
			} else {
				m.appendReplacement(sb, "");
			}
		}
		m.appendTail(sb);
		newTexte = sb.toString();
		final BufferedReader br = new BufferedReader(new StringReader(newTexte));
		String line = null;
		sb = new StringBuffer();
		while ((line = br.readLine()) != null) {
			if (!StringUtils.isEmpty(line)) {
				sb.append(line + cr);
			}
		}
		newTexte = sb.toString();
		newTexte = newTexte.replaceAll("&rsquo;", "&#39;");
		newTexte = newTexte.replaceAll("&oelig;", "oe");
		newTexte = newTexte.replaceAll("&euro;", "e");
		return EscapeString.unescapeHtml(newTexte);
	}
}
