package org.frs.html;

import java.util.ArrayList;



public class ExtendedHtmlParser {
	
	public static ArrayList<CrudeHtmlElement> getRootElements(String document){
		ArrayList<CrudeHtmlElement> elements = new ArrayList<CrudeHtmlElement>();
		String meta = new String(document);
		int metaLength = 0;
		while(meta.length() > 0) {
			metaLength = meta.length();
			String tmpTag = "";
			boolean read = false;
			String openingTag = null;
			String content = "";
			int finish = 0;
			boolean insideStringLiteral = false;
			for(int i = 0; i < meta.length(); i++) {
				char c = meta.charAt(i);
				if(c == '"')
					insideStringLiteral = !insideStringLiteral;
				if(openingTag != null)
					content += c;
				if(c == '<' && !insideStringLiteral)
					read = true;
				if(read)
					tmpTag += c;
				if(read && c == '>' && !insideStringLiteral) {
					String tag = new String(tmpTag);
					if(tag.startsWith("</")) {
						finish--;
						if(finish == 0) {
							break;
						}
					}else {
						if(openingTag == null) {
							openingTag = tag;
							finish++;
							if(tag.contains("/>"))
								break;
						}else {
							if(!tag.contains("/>"))
								finish++;

						}
					}
					tmpTag = "";
					read = false;
				}
			}
			if(openingTag != null) {
				CrudeHtmlElement element = new CrudeHtmlElement();
				element.setRawContent(openingTag + content);
				element.deserialize();
				elements.add(element);
				int index = meta.indexOf(element.getRawContent());
				meta = meta.substring(0, index) + meta.substring(index + element.getRawContent().length());
			}else {
				break;
			}
		}
		return elements;
	}
	
	public void parse(String document, CrudeHtmlElement root) {
		ArrayList<CrudeHtmlElement> rootElements = getRootElements(document);
		if(rootElements.isEmpty()) {
			root.setContent(document);
			return; 
		}
		root.setContent("");
		root.setRawContent("");
		for(CrudeHtmlElement element: rootElements) {
			root.addChild(element);
			parse(element.getContent(), element);
		}
	}

}
