package gov.noaa.pmel.tmap.addxml;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.jdom.Element;

import thredds.catalog.InvAccess;
import thredds.catalog.InvCatalog;
import thredds.catalog.InvCatalogImpl;
import thredds.catalog.InvCatalogRef;
import thredds.catalog.InvDataset;
import thredds.catalog.InvDatasetImpl;
import thredds.catalog.InvProperty;
import thredds.catalog.InvService;
import thredds.catalog.ServiceType;
import ucar.nc2.constants.FeatureType;
import ucar.nc2.dataset.CoordinateAxis;
import ucar.nc2.dataset.CoordinateAxis1D;
import ucar.nc2.dataset.NetcdfDataset;
import ucar.nc2.dt.GridCoordSystem;
import ucar.nc2.dt.GridDataset;
import ucar.nc2.dt.GridDatatype;
import ucar.nc2.dt.TypedDatasetFactory;

public class CatalogCleaner {

	private InvCatalogImpl sourceCatalog;
	private InvCatalogImpl cleanCatalog;
	private String key;
	private InvService remoteService;
	private InvService localService;
	private boolean aggregate = false;
	private int total;
	private int total_aggregations = 0;
	private int total_files = 0;
	private int total_catalogs = 0;
	private boolean done;
	private static final int MAX_ACCESS_POINTS = 100;
	private static final int MIN_AGGS = 10;
	private static final int MIN_FILES = 100;
	//private static final int MAX_TOTAL_FILES = 1000;
	private static final int MAX_TOTAL_FILES = 1000;
	private static final int MIN_CATALOGS = 10;
	private String refs;
	private String stop_string;
	private Set<String> skip;
    // "yyyy-MM-dd HH:mm:ss,S"  	2001-07-04 12:08:56,831
	private static SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,S");
	public CatalogCleaner (InvCatalog catalog, boolean aggregate, String output_type, Set<String> skip, String stop_string) throws URISyntaxException, UnsupportedEncodingException {
		this.aggregate = aggregate;
		this.refs = output_type;
		this.stop_string = stop_string;
		this.skip = skip;
		sourceCatalog = (InvCatalogImpl) catalog;
		key = JDOMUtils.MD5Encode(catalog.getUriString());
		cleanCatalog = new InvCatalogImpl("Clean Catalog for "+sourceCatalog.getUriString(), "1.0.1", new URI(catalog.getUriString()));
		localService = new InvService("localOPeNDAP_"+key, "OPeNDAP", "/thredds/dodsC/", null, null);
		cleanCatalog.addService(localService);
	}
	public InvCatalogImpl cleanCatalog() throws Exception {
		done = false;
		total = 0;
	
		List<InvDataset> threddsDatasets = sourceCatalog.getDatasets();
		for (Iterator dsIt = threddsDatasets.iterator(); dsIt.hasNext();) {
			InvDatasetImpl invDataset = (InvDatasetImpl) dsIt.next();
			if ( invDataset.hasAccess() ) {
				if ( hasGrid(invDataset) ) {	
					total++;					
				    addGridDataset(invDataset);
				}
			}
			if ( invDataset.hasNestedDatasets() ) {
				if ( done ) {
					cleanCatalog.finish();
					Cleaner.info("Checked "+total+" files.  Found "+total_files+" files and "+total_aggregations+" aggregations.", 0);
					System.out.println("Summary, "+sourceCatalog.getUriString()+", files checked; files found; aggregations made; total catalogs refed,"+total+", "+total_files+", "+total_aggregations+", "+total_catalogs);
					return cleanCatalog;
				}
				clean(invDataset);

			}
		}
		
		Cleaner.info("Checked "+total+" files.  Found "+total_files+" files and "+total_aggregations+" aggregations.", 0);
		System.out.println("Summary, "+sourceCatalog.getUriString()+", files checked; files found; aggregations made; total catalogs refed, "+total+", "+total_files+", "+total_aggregations+", "+total_catalogs);
		cleanCatalog.finish();
		return cleanCatalog;
	}
	private void addCatalogRef(List<DatasetGridPair> group) throws UnsupportedEncodingException {
		total_catalogs++;
		InvDatasetImpl group_parent = (InvDatasetImpl) group.get(0).getDataset().getParent();
		String url = group_parent.getCatalogUrl();
		url = url.replace("#null", "");
		InvCatalogRef ref = new InvCatalogRef(group_parent, group_parent.getFullName(), url);
		InvDatasetImpl parent = (InvDatasetImpl) group_parent.getParent();
		InvDatasetImpl parent_in_catalog = null;
		if ( parent != null ) {
			parent_in_catalog = (InvDatasetImpl) cleanCatalog.findDatasetByID(JDOMUtils.MD5Encode(parent.getName()));
			if ( parent_in_catalog != null ) {
				parent_in_catalog = new InvDatasetImpl(parent);
				cleanCatalog.addDataset(parent_in_catalog);
			}
		}
		if ( parent_in_catalog != null ) {
		    parent_in_catalog.addDataset(ref);
		} else {
			cleanCatalog.addDataset(ref);
		}
	}
	private void addGridDataset(InvDatasetImpl invDataset) {
		total_files++;
		InvAccess access = invDataset.getAccess(ServiceType.OPENDAP);
		String url = access.getUrlPath();
		if ( remoteService == null ) {
			String full_url = access.getStandardUri().toString();
			String base = full_url.substring(0, full_url.indexOf(url));
			setService(base);	
		}
		InvDatasetImpl dataset = new InvDatasetImpl((InvDatasetImpl)invDataset);
		dataset.setServiceName(remoteService.getName());
		dataset.setUrlPath(url);
		dataset.setID(dataset.getUrlPath()+"_"+key);
		dataset.setDataType(FeatureType.GRID);
		dataset.finish();
		cleanCatalog.addDataset(dataset);
	}
	private void setService(String base) {		
		remoteService = new InvService("remoteOPeNDAP_"+key, "OPeNDAP", base, null, null);
		cleanCatalog.addService(remoteService);
	}
	private void addAggregation(InvDatasetImpl parent, InvDatasetImpl invDataset, List<DatasetGridPair> agg, int index) throws Exception {
		total_aggregations++;
		InvDatasetImpl aggDatasetNode = new InvDatasetImpl((InvDatasetImpl)invDataset);
		aggDatasetNode.setName(aggDatasetNode.getName()+" "+index);
		String id = key+"_"+total_aggregations;
		aggDatasetNode.setUrlPath(id+"/aggregation_"+total_aggregations);
		aggDatasetNode.setID(id);
		Element ncml = NCML.getRootElement();
		NCML.addAggregationElement(ncml, agg.get(0).getGrid());
		for (int i = 0; i < agg.size(); i++ ) {
			GridDataset aggDataset = (GridDataset) agg.get(i).getGrid();
			NCML.addDataset(ncml, aggDataset);
		}
		aggDatasetNode.setServiceName(localService.getName());
		aggDatasetNode.setNcmlElement(ncml);
		aggDatasetNode.setDataType(FeatureType.GRID);
		aggDatasetNode.finish();
		parent.addDataset(aggDatasetNode);
	}
	public void clean(InvDatasetImpl invDataset) throws Exception {	
		if ( !stop_string.equals("") && invDataset.getName().contains(stop_string)) {
			done = true;
		}
		for (Iterator skipIt = skip.iterator(); skipIt.hasNext();) {
			String skip_string = (String) skipIt.next();
			if ( invDataset.getName().contains(skip_string) ) {
				return;
			}
		}
		if ( !done ) {			
			List<InvDataset> children = invDataset.getDatasets();
			List<InvDatasetImpl> possibleAggregates = new ArrayList<InvDatasetImpl>();
			List<InvDatasetImpl> containerDatasets = new ArrayList<InvDatasetImpl>();
			for (Iterator dsIt = children.iterator(); dsIt.hasNext();) {
				InvDatasetImpl dataset = (InvDatasetImpl) dsIt.next();
				if ( dataset.hasAccess() ) {
					possibleAggregates.add(dataset);
					total++;
				} else {
					containerDatasets.add(dataset);
				}
			}
			if ( total > MAX_TOTAL_FILES && (total_aggregations < MIN_AGGS || total_files < MIN_FILES || total_catalogs < MIN_CATALOGS ) ) {
				done = true;
				Cleaner.info("We've looked at over "+MAX_TOTAL_FILES+" files in this catalog and have fewer than "+MIN_AGGS+" aggregations and "+MIN_FILES+" files in the clean catalog... ", 0);
				Cleaner.info("Consider subdividing this catalog into more managable parts.", 0);
				return;
			} else {
				if ( total > 1 && total % 100 == 0 ) {
					Cleaner.info("Looked at "+total+" files so far.  Found "+total_files+" files and "+total_aggregations+" aggregations so far.", 1);
				}
			}
			if ( possibleAggregates.size() > 0 && possibleAggregates.size() <= MAX_ACCESS_POINTS ) {
 				Cleaner.info("AGGREGATES: Starting aggregate analysis for "+possibleAggregates.size()+" datasets from "+invDataset.getName()+".", 1);
				Aggregates aggregates = new Aggregates(possibleAggregates, aggregate);
				Cleaner.info("AGGREGATES: Finished aggregate analysis for "+invDataset.getName()+" datasets.", 1);
				Cleaner.info("AGGREGATES: Starting to build the aggregation for "+invDataset.getName()+" datasets.", 1);
				if ( remoteService == null ) {
					setService(aggregates.getBase());
				}
				if ( aggregates.needsAggregation() && aggregate ) {
					List<List<DatasetGridPair>> aggregations = aggregates.getAggregations();
					InvDatasetImpl parent = new InvDatasetImpl((InvDatasetImpl) invDataset);
					cleanCatalog.addDataset(parent);
					for (int i = 0; i < aggregations.size(); i++) {
						List<DatasetGridPair> agg = (List<DatasetGridPair>) aggregations.get(i);
						addAggregation(parent, invDataset, agg, i);
					}
				} else if ( aggregates.needsAggregation() && !aggregate ) {
					InvDatasetImpl parent = new InvDatasetImpl((InvDatasetImpl) invDataset);
					InvProperty property = new InvProperty("needsAggregation", "true");
					parent.addProperty(property);
					cleanCatalog.addDataset(parent);
				} 
				if ( aggregates.hasIndividualDataset() ) {
					for (Iterator ndsIt = aggregates.getIndividuals().iterator(); ndsIt.hasNext();) {
						List<DatasetGridPair> group = (List<DatasetGridPair>) ndsIt.next();
                        addCatalogRef(group);
					}
				}
				Cleaner.info("AGGREGATES: Finished building the aggregation for "+invDataset.getName()+" datasets.", 1);
			} else {
				Cleaner.info("Skipping "+invDataset.getName()+" because it is just too hard to contemplate working with "+possibleAggregates.size()+" data sets.", 1);
			}

			for (Iterator dsIt = containerDatasets.iterator(); dsIt.hasNext();) {
				InvDatasetImpl container = (InvDatasetImpl) dsIt.next();
				clean(container);
			}
		}
	}

	private static boolean hasGrid(InvDatasetImpl dataset) {
		Boolean has_good_grid = false;
		InvAccess access = dataset.getAccess(ServiceType.OPENDAP);

		if ( access != null ) {
			String accessUrl = access.getStandardUrlName();
			Cleaner.info("HASGRID: Starting grid analysis for "+accessUrl, 1);
			try {
				NetcdfDataset nc = NetcdfDataset.openDataset(accessUrl);
				StringBuilder error = new StringBuilder();
				GridDataset gridDataset = (GridDataset) TypedDatasetFactory.open(FeatureType.GRID, nc, null, error);
				if ( gridDataset != null ) {

					has_good_grid = hasGrid(gridDataset);

				}
			} catch (IOException e) {
				Cleaner.error("HASGRID: Failed to open "+accessUrl+" with "+e.getLocalizedMessage(), 2);
			}
			Cleaner.info("HASGRID: Finished grid analysis for "+accessUrl, 1);
		}
		return has_good_grid;
	}
	public static boolean hasGrid(GridDataset gridDataset) {
		boolean has_good_grid = false;
		List<GridDatatype> grids = gridDataset.getGrids();
		if ( grids != null && grids.size() > 0 ) {
			for (Iterator gridIt = grids.iterator(); gridIt.hasNext();) {
				GridDatatype grid = (GridDatatype) gridIt.next();
				GridCoordSystem gcs = grid.getCoordinateSystem();
				CoordinateAxis xa = (CoordinateAxis) gcs.getXHorizAxis();
				CoordinateAxis ya = (CoordinateAxis) gcs.getYHorizAxis();
				if (xa instanceof CoordinateAxis1D && ya instanceof CoordinateAxis1D ) {
					CoordinateAxis1D x = (CoordinateAxis1D) xa;
					CoordinateAxis1D y = (CoordinateAxis1D) ya;	
					if ( x.getSize() > 1 && y.getSize() > 1 ) {
						return true;
					}
				}
			}
		}
		return has_good_grid;
	}
}
