/* 
 * LoadDatasets Copyright 2008, NOAA.
 * See the LICENSE.txt file in this file's directory.
 */
package gov.noaa.pfel.erddap;


import com.cohort.array.StringArray;
import com.cohort.util.Calendar2;
import com.cohort.util.File2;
import com.cohort.util.Math2;
import com.cohort.util.MustBe;
import com.cohort.util.String2;
import com.cohort.util.XML;

import gov.noaa.pfel.coastwatch.sgt.GSHHS;
import gov.noaa.pfel.coastwatch.sgt.SgtMap;
import gov.noaa.pfel.coastwatch.util.SimpleXMLReader;
import gov.noaa.pfel.coastwatch.util.SSR;

import gov.noaa.pfel.erddap.dataset.*;
import gov.noaa.pfel.erddap.util.*;
import gov.noaa.pfel.erddap.variable.EDV;
import gov.noaa.pfel.erddap.variable.EDVGridAxis;
import gov.noaa.pfel.erddap.variable.EDVTimeGridAxis;

import java.io.FileInputStream;
import java.net.HttpURLConnection;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/**
 * This class is run in a separate thread to load datasets for ERDDAP.
 * !!!A lot of possible thread synchronization issues in this class 
 * don't arise because of the assumption that only one 
 * LoadDatasets thread will be running at once (coordinated by RunLoadDatasets).
 *
 * <p>Features:<ul>
 * <li> Different datasets can have different reloadEveryNMinutes settings.
 * <li> This doesn't load datasets if they exist and are young,
 *     so don't need to be reloaded.
 * <li> Erddap starts up quickly (although without all the datasets).
 *     (The CWBrowsers started up slowly.)
 * <li> Datasets are made available in Erddap one-by-one as they are loaded 
 *     (not in batch mode).
 * <li> Loading datasets takes time, but is done in a separate thread
 *    so it never slows down requests for a dataset.
 * <li> Only one thread is used to load all the datasets, so loading
 *    datasets never becomes a drain of computer resources.
 * <li> The datasets.xml file is read anew each time this is run,
 *    so you can make changes to the file (e.g., add datasets or change metadata)
 *    and the results take effect without restarting Erddap.
 * </ul>
 *
 * @author Bob Simons (bob.simons@noaa.gov) 2008-02-12
 */
public class LoadDatasets extends Thread {

    /** "ERROR" is defined here (from String2.ERROR) so that it is consistent in log files. */
    public final static String ERROR = String2.ERROR;
    protected static int institutionCat;

    /**
     * Set this to true (by calling verbose=true in your program, 
     * not but changing the code here)
     * if you want lots of diagnostic messages sent to String2.log.
     */
    public static boolean verbose = false; 
    public static boolean reallyVerbose = false; 

    //*** things set by constructor 
    private Erddap erddap;
    private String datasetsRegex;
    private InputStream inputStream;
    private boolean majorLoad;

    /* This is set by run if there is an unexpected error. */   
    public String unexpectedError = ""; 

    /** 
     * This is a collection of all the exceptions from all the 
     * datasets that didn't load successfully and other warnings from LoadDatasets.
     * It will be length=0 if no warnings.
     */
    public StringBuffer warningsFromLoadDatasets = new StringBuffer();

    /** 
     * An alternate constructor that lets the caller provide the datasetx.xml inputStream.
     *
     * @param erddap  run() places results back in erddap as they become available
     * @param datasetsRegex  usually either EDStatic.datasetsRegex or a custom regex for flagged datasets.
     * @param inputStream with the datasets.xml information.
     *     There is no need to wrap this in a buffered InputStream -- that will be done here.
     *     If null, run() will make a copy of [EDStatic.contentDirectory]/datasets.xml
     *     and make an inputStream from the copy.
     * @param majorLoad if true, this does time-consuming garbage collection,
     *     logs memory usage information,
     *     and checks if Daily report should be sent.
     */
    public LoadDatasets(Erddap erddap, String datasetsRegex, InputStream inputStream,
            boolean majorLoad) {
        this.erddap = erddap;
        this.datasetsRegex = datasetsRegex;
        this.inputStream = inputStream;
        this.majorLoad = majorLoad;
        setName("LoadDatasets");
    }

    /**
     * This reads datasets[2].xml and loads all the datasets, 
     * placing results back in erddap as they become available.
     */
    public void run() {
        SimpleXMLReader xmlReader = null;
        try {
            String2.log("\n" + String2.makeString('*', 80) +  
                "\nLoadDatasets.run EDStatic.developmentMode=" + EDStatic.developmentMode + 
                " " + Calendar2.getCurrentISODateTimeStringLocal() +
                "\n  datasetsRegex=" + datasetsRegex + 
                " inputStream=" + (inputStream == null? "null" : "something") + 
                " majorLoad=" + majorLoad);
            long memoryInUse = 0;
            if (majorLoad) {
                //gc so getMemoryInUse more accurate
                //don't use Math2.sleep which catches/ignores interrupt
                System.gc();  Thread.sleep(500); 
                System.gc();  Thread.sleep(500); 
                memoryInUse = Math2.getMemoryInUse();
                String2.log(Math2.memoryString() + " " + Math2.xmxMemoryString());
            }
            long startTime = System.currentTimeMillis();
            int oldNGrid = erddap.gridDatasetHashMap.size();
            int oldNTable = erddap.tableDatasetHashMap.size();
            institutionCat = String2.indexOf(EDStatic.categoryAttributes, EDStatic.INSTITUTION);
            Map tUserHashMap = new HashMap(); //no need for synchronized, all puts are here (1 thread); future gets are thread safe
            StringBuffer datasetsThatFailedToLoadSB = new StringBuffer();

            //decision: how to process xml dataset description
            //  XPath - a standard, but sometimes slow and takes tons of memory
            //    Good: very concise code for parsing
            //    Severe problem: without making schemas, hard to catch/report mistyped tag names  
            //    Maybe I should be writing schemas...
            //  SimpleXMLReader - not a standard, but fast
            //    Good: easy to catch/report mistyped tag Names
            //    Low memory use.
            //I went with SimpleXMLReader

            if (inputStream == null) {
                //make a copy of datasets.xml so administrator can change file whenever desired
                //  and not affect simpleXMLReader
                String oldFileName = EDStatic.contentDirectory + 
                    "datasets" + (EDStatic.developmentMode? "2" : "") + ".xml";
                String newFileName = EDStatic.bigParentDirectory + "currentDatasets.xml";
                if (!File2.copy(oldFileName, newFileName)) 
                    throw new RuntimeException("Unable to copy " + oldFileName + " to " + newFileName);
                inputStream = new FileInputStream(newFileName);
            }

            //read datasets.xml
            xmlReader = new SimpleXMLReader(inputStream, "erddapDatasets");
            String startError = "datasets.xml error on line #";
            int nDatasets = 0;
            int nTry = 0;
            while (true) {
                //check for interruption 
                if (isInterrupted()) { 
                    String2.log("*** The LoadDatasets thread was interrupted at " + 
                        Calendar2.getCurrentISODateTimeStringLocal());
                    xmlReader.close();
                    return;
                }

                xmlReader.nextTag();
                String tags = xmlReader.allTags();
                if      (tags.equals("</erddapDatasets>")) {
                    break;
                } else if (tags.equals("<erddapDatasets><dataset>")) {
                    //just load minimal datasets?
                    nDatasets++;
                    String tId = xmlReader.attributeValue("datasetID"); 

                    //skip dataset based on datasetsRegex?
                    boolean skip = !tId.matches(datasetsRegex); //we're looking for reasons to skip it
                    if (skip) {
                        if (reallyVerbose) String2.log("*** skipping datasetID=" + tId + 
                            " because of datasetsRegex.");
                    } else {
                        //look for flag
                        boolean isFlagged = File2.delete(EDStatic.fullResetFlagDirectory + tId);
                        if (isFlagged) {
                            String2.log("*** reloading datasetID=" + tId + 
                                " because it was in flag directory.");
                        } else {
                            //does the dataset already exist and is young?
                            EDD oldEdd = (EDD)erddap.gridDatasetHashMap.get(tId);
                            if (oldEdd == null)
                                oldEdd = (EDD)erddap.tableDatasetHashMap.get(tId);
                            if (oldEdd != null) {
                                long minutesOld = oldEdd.creationTimeMillis() <= 0?  //see edd.setCreationTimeTo0
                                    Long.MAX_VALUE :
                                    (System.currentTimeMillis() - oldEdd.creationTimeMillis()) / 60000; 
                                if (minutesOld < oldEdd.getReloadEveryNMinutes()) {
                                    //it exists and is young
                                    String2.log("*** skipping datasetID=" + tId + 
                                        ": it already exists and minutesOld=" + minutesOld +
                                        " is less than reloadEvery=" + oldEdd.getReloadEveryNMinutes());
                                    skip = true;
                                }
                            }
                        }

                        //active="false"?  (very powerful)
                        String tActiveString = xmlReader.attributeValue("active"); 
                        boolean tActive = tActiveString != null && tActiveString.equals("false")? false : true; 
                        if (!tActive) {
                            //marked not active now; was it active?
                            if (erddap.gridDatasetHashMap.remove( tId) != null ||
                                erddap.tableDatasetHashMap.remove(tId) != null) {
                                //it was active; finish removing it
                                //do in quick succession...   (could be synchronized on erddap)
                                String2.log("*** removing datasetID=" + tId + " because active=\"false\".");
                                erddap.categoryInfo = copyCatInfoMinusId(erddap.categoryInfo, tId);
                                File2.deleteAllFiles(EDStatic.fullCacheDirectory + tId);
                            }
                            skip = true;
                        }
                    }

                    if (skip) {
                        //skip over the tags for this dataset
                        while (!tags.equals("<erddapDatasets></dataset>")) {
                            xmlReader.nextTag();
                            tags = xmlReader.allTags();
                        }
                    } else {
                        //try to load this dataset
                        nTry++;
                        String change = "";
                        EDD dataset = null, oldDataset = null;
                        try {
                            dataset = EDD.fromXml(xmlReader.attributeValue("type"), xmlReader);
                            Map newCategoryInfo = copyCatInfoMinusId(erddap.categoryInfo, tId);
                            addDatasetInfo(newCategoryInfo, dataset); //make changes to the copy

                            //check for interruption right before making changes to Erddap
                            if (isInterrupted()) { //this is a likely place to catch interruption
                                String2.log("*** The LoadDatasets thread was interrupted at " + 
                                    Calendar2.getCurrentISODateTimeStringLocal());
                                xmlReader.close();
                                return;
                            }

                            //do in quick succession...   (could be synchronized on erddap)
                            //hashMap.put atomically replaces old version with new
                            oldDataset = dataset instanceof EDDGrid?
                                (EDD)(erddap.gridDatasetHashMap.put( dataset.datasetID(), dataset)) :
                                (EDD)(erddap.tableDatasetHashMap.put(dataset.datasetID(), dataset));
                            erddap.categoryInfo = newCategoryInfo;
                            //clear the dataset's cache 
                            //since axis values may have changed and "last" may have changed
                            File2.deleteAllFiles(EDStatic.fullCacheDirectory + dataset.datasetID());                           
                       
                            change = dataset.changed(oldDataset);
                            if (change.length() == 0 && dataset instanceof EDDTable)
                                change = "The dataset was reloaded.";

                        } catch (Throwable t) {
                            dataset = null;
                            //if oldDataset exists, remove it
                            Map newCategoryInfo = copyCatInfoMinusId(erddap.categoryInfo, tId); //slow, so do first

                            //check for interruption right before making changes to Erddap
                            if (isInterrupted()) { //this is a likely place to catch interruption
                                String tError2 = "*** The LoadDatasets thread was interrupted at " + 
                                    Calendar2.getCurrentISODateTimeStringLocal();
                                String2.log(tError2);
                                warningsFromLoadDatasets.append(tError2 + "\n\n");
                                xmlReader.close();
                                return;
                            }

                            //do in quick succession...   (could be synchronized on erddap)
                            EDD tDataset = (EDD)erddap.gridDatasetHashMap.remove(tId); //always ensure it was removed
                            if (tDataset == null)
                                tDataset = (EDD)erddap.tableDatasetHashMap.remove(tId);
                            if (oldDataset == null)
                                oldDataset = tDataset;
                            erddap.categoryInfo = newCategoryInfo; 
                            String tError = startError + xmlReader.lineNumber() + "\n" + 
                                "While trying to load datasetID=" + tId + "\n" +
                                MustBe.throwableToString(t);
                            String2.log(tError);
                            warningsFromLoadDatasets.append(tError + "\n\n");
                            datasetsThatFailedToLoadSB.append(tId + ", ");

                            //stop???
                            if (!xmlReader.isOpen()) { //error was really serious
                                xmlReader.close();
                                throw new RuntimeException(startError + xmlReader.lineNumber() + 
                                    ": " + t.toString(), t);
                            }
           
                            //skip over the remaining tags for this dataset
                            try {
                                while (!xmlReader.allTags().equals("<erddapDatasets></dataset>")) 
                                    xmlReader.nextTag();
                            } catch (Throwable t2) {
                                xmlReader.close();
                                throw new RuntimeException(startError + xmlReader.lineNumber() + 
                                    ": " + t2.toString(), t2);
                            }

                            //change      (if oldDataset=null and new one failed to load, no change)
                            if (oldDataset != null)  
                                change = tError;
                        }
                        if (verbose) String2.log("change=" + change);

                        //trigger subscription and dataset.onChange actions (after new dataset is in place)
                        EDD cooDataset = dataset == null? oldDataset : dataset; //currentOrOld, may be null
                        if (change.length() > 0) {
                            try {
                                StringArray actions = null;

                                if (EDStatic.subscriptionSystemActive) { 
                                    //get subscription actions
                                    try { //beware exceptions from subscriptions
                                        actions = EDStatic.subscriptions.listActions(tId);
                                    } catch (Throwable listT) {
                                        String subject = startError + xmlReader.lineNumber() + " with Subscriptions";
                                        String content = MustBe.throwableToString(listT); 
                                        String2.log(subject + ":\n" + content);
                                        EDStatic.email(EDStatic.emailEverythingTo, subject, content);
                                        actions = new StringArray();
                                    }
                                } else actions = new StringArray();

                                //get dataset.onChange actions
                                int nSubscriptionActions = actions.size();
                                if (cooDataset != null) {
                                    if (cooDataset.onChange() != null) actions.append(cooDataset.onChange());
                                }

                                //do the actions
                                if (verbose) String2.log("nActions=" + actions.size());

                                for (int a = 0; a < actions.size(); a++) {
                                    String tAction = actions.get(a);
                                    if (reallyVerbose) String2.log("doing action=" + tAction);
                                    try {
                                        if (tAction.startsWith("http://")) {
                                            //but don't get the input stream! I don't need to, 
                                            //and it is a big security risk.
                                            SSR.touchUrl(tAction, 60000);
                                        } else if (tAction.startsWith("mailto:")) {
                                            String tEmail = tAction.substring("mailto:".length());
                                            EDStatic.email(tEmail,
                                                "datasetID=" + tId + " changed.", 
                                                "datasetID=" + tId + " changed.\n" + 
                                                change + "\n\n*****\n" +
                                                (a < nSubscriptionActions? 
                                                    EDStatic.subscriptions.messageToRequestList(tEmail) :
                                                    "This action is specified in datasets.xml.\n")); 
                                                    //It would be nice to include unsubscribe 
                                                    //info for this action, 
                                                    //but it isn't easily available.
                                        } else {
                                            throw new RuntimeException("The startsWith of action=" + 
                                                tAction + " is not allowed!");
                                        }
                                    } catch (Throwable actionT) {
                                        String2.log(startError + xmlReader.lineNumber() + "\n" + 
                                            "action=" + tAction + "\n" + 
                                            MustBe.throwableToString(actionT));
                                    }
                                }
                            } catch (Throwable subT) {
                                String subject = startError + xmlReader.lineNumber() + " with Subscriptions";
                                String content = MustBe.throwableToString(subT); 
                                String2.log(subject + ":\n" + content);
                                EDStatic.email(EDStatic.emailEverythingTo, subject, content);
                            }
                        }

                        //trigger RSS action 
                        // (after new dataset is in place and if there is either a current or older dataset)
                        if (cooDataset != null && change.length() > 0) {
                            try {
                                //generate the rss xml
                                //See general info: http://en.wikipedia.org/wiki/RSS_(file_format)
                                //  background: http://www.mnot.net/rss/tutorial/
                                //  rss 2.0 spec: http://cyber.law.harvard.edu/rss/rss.html
                                //I chose rss 2.0 for no special reason (most modern version of that fork; I like "simple").
                                //The feed programs didn't really care if just pubDate changed.
                                //  They care about item titles changing.
                                //  So this treats every change as a new item with a different title, 
                                //    replacing the previous item.
                                StringBuffer rss = new StringBuffer();
                                GregorianCalendar gc = Calendar2.newGCalendarZulu();
                                String pubDate = 
                                    "    <pubDate>" + Calendar2.formatAsRFC822GMT(gc) + "</pubDate>\n";
                                String link = 
                                    "    <link>" + EDStatic.publicErddapUrl(cooDataset.getAccessibleTo() == null) +
                                        "/" + cooDataset.dapProtocol() + "/" + tId;
                                rss.append(
                                    "<?xml version=\"1.0\"?>\n" +
                                    "<rss version=\"2.0\">\n" +
                                    "  <channel>\n" +
                                    "    <title>ERDDAP: " + XML.encodeAsXML(cooDataset.title()) + "</title>\n" +
                                    "    <description>This RSS feed changes when the dataset changes.</description>\n" +      
                                    link + ".html</link>\n" +
                                    pubDate +
                                    "    <item>\n" +
                                    "      <title>This dataset changed " + Calendar2.formatAsISODateTimeT(gc) + "Z</title>\n" +
                                    "  " + link + ".html</link>\n" +
                                    "      <description>" + XML.encodeAsXML(change) + "</description>\n" +      
                                    "    </item>\n" +
                                    "  </channel>\n" +
                                    "</rss>\n");

                                //store the xml
                                erddap.rssHashMap.put(tId, String2.getUTF8Bytes(rss.toString()));

                            } catch (Throwable rssT) {
                                String subject = startError + xmlReader.lineNumber() + " with RSS";
                                String content = MustBe.throwableToString(rssT); 
                                String2.log(subject + ":\n" + content);
                                EDStatic.email(EDStatic.emailEverythingTo, subject, content);
                            }
                        }

                    }

                } else if (tags.equals("<erddapDatasets><subscriptionEmailBlacklist>")) {
                } else if (tags.equals("<erddapDatasets></subscriptionEmailBlacklist>")) {
                    if (EDStatic.subscriptionSystemActive)
                        EDStatic.subscriptions.setEmailBlacklist(xmlReader.content());

                } else if (tags.equals("<erddapDatasets><requestBlacklist>")) {
                } else if (tags.equals("<erddapDatasets></requestBlacklist>")) {
                    EDStatic.setRequestBlacklist(xmlReader.content());

                //<user username="bsimons" password="..." roles="admin, role1" />
                //this mimics tomcat syntax
                } else if (tags.equals("<erddapDatasets><user>")) { 
                    String tUser = xmlReader.attributeValue("username");
                    String tPassword  = xmlReader.attributeValue("password");  
                    if (tPassword != null && !EDStatic.passwordEncoding.equals("plaintext")) 
                        tPassword = tPassword.toLowerCase(); //match Digest Authentication standard case
                    String ttRoles = xmlReader.attributeValue("roles");
                    String tRoles[] = ttRoles == null || ttRoles.trim().length() == 0?
                        new String[0] : String2.split(ttRoles, ','); 

                    if (tUser != null && tUser.length() > 0) {
                        if (EDStatic.authentication.equals("custom") &&   //others in future
                            (tPassword == null || tPassword.length() < 7)) {
                            warningsFromLoadDatasets.append(
                                "datasets.xml error: The password for <user> username=" + tUser + 
                                " in datasets.xml had fewer than 7 characters.\n\n");
                        } else {
                            Arrays.sort(tRoles);
                            if (reallyVerbose) String2.log("user=" + tUser + " roles=" + String2.toCSVString(tRoles));
                            Object o = tUserHashMap.put(tUser, new Object[]{tPassword, tRoles});
                            if (o != null)
                                warningsFromLoadDatasets.append(
                                    "datasets.xml error: There are two <user> tags in datasets.xml with username=" + 
                                    tUser + "\nChange one of them.\n\n");
                        }
                    } else {
                        warningsFromLoadDatasets.append(
                            "datasets.xml error: A <user> tag in datasets.xml had no username=\"\" attribute.\n\n");
                    }

                } else if (tags.equals("<erddapDatasets></user>")) { //do nothing

                } else {
                    xmlReader.unexpectedTagException();
                }
            }
            xmlReader.close();
            xmlReader = null;

            //atomic swap into place
            EDStatic.setUserHashMap(tUserHashMap); 
            //datasetsThatFailedToLoad only swapped into place if majorLoad (see below)
            String2.noLongLines(datasetsThatFailedToLoadSB, 70, "    ");
            String datasetsThatFailedToLoad =
                "Datasets That Failed To Load (in the last " + 
                (majorLoad? "major" : "minor") + " LoadDatasets)" +
                (datasetsThatFailedToLoadSB.length() == 0? ": (none)\n" :
                    "    " + datasetsThatFailedToLoadSB.toString() + "(end)\n");
            EDStatic.nGridDatasets = erddap.gridDatasetHashMap.size();
            EDStatic.nTableDatasets = erddap.tableDatasetHashMap.size();

            //*** print lots of useful information
            long loadDatasetsTime = System.currentTimeMillis() - startTime;
            String cDateTimeLocal = Calendar2.getCurrentISODateTimeStringLocal();
            String2.log("\n" + String2.makeString('*', 80) + "\n" + 
                "LoadDatasets.run finished at " + cDateTimeLocal + "  TOTAL TIME=" + loadDatasetsTime + "\n" +
                "  nGridDatasets active=" + EDStatic.nGridDatasets + 
                    " change=" + (EDStatic.nGridDatasets - oldNGrid) + "\n" +
                "  nTableDatasets active=" + EDStatic.nTableDatasets + 
                    " change=" + (EDStatic.nTableDatasets - oldNTable) + "\n" +
                "  nDatasets in datasets.xml=" + nDatasets + " (nTry=" + nTry + ")\n" +
                "  nUsers=" + tUserHashMap.size());

            //minorLoad?
            if (!majorLoad) {
                String2.distribute(loadDatasetsTime, EDStatic.minorLoadDatasetsDistribution24);
                String2.distribute(loadDatasetsTime, EDStatic.minorLoadDatasetsDistributionTotal);
                String2.log(datasetsThatFailedToLoad);
            }

            //majorLoad?
            if (majorLoad) {
                String2.distribute(loadDatasetsTime, EDStatic.majorLoadDatasetsDistribution24);
                String2.distribute(loadDatasetsTime, EDStatic.majorLoadDatasetsDistributionTotal);
                //gc so getMemoryInUse more accurate
                //don't use Math2.sleep which catches/ignores interrupt
                System.gc();  Thread.sleep(500); 
                System.gc();  Thread.sleep(500); 
                String memoryString = Math2.memoryString();
                String2.log(
                    "  " + memoryString + " " + Math2.xmxMemoryString() +
                    "\n  change for this run of major Load Datasets (MB) = " + ((Math2.getMemoryInUse() - memoryInUse) / 1000000) + "\n");

                EDStatic.datasetsThatFailedToLoad = datasetsThatFailedToLoad; //swap into place
                EDStatic.memoryUseLoadDatasetsSB.append("  " + cDateTimeLocal + "  " + memoryString + "\n");
                EDStatic.failureTimesLoadDatasetsSB.append("  " + cDateTimeLocal + "  " + 
                    String2.getBriefDistributionStatistics(EDStatic.failureTimesDistributionLoadDatasets) + "\n");
                EDStatic.responseTimesLoadDatasetsSB.append("  " + cDateTimeLocal + "  " + 
                    String2.getBriefDistributionStatistics(EDStatic.responseTimesDistributionLoadDatasets) + "\n");

                //email daily report?
                GregorianCalendar reportCalendar = Calendar2.newGCalendarLocal();
                String reportDate = Calendar2.formatAsISODate(reportCalendar);
                int hour = reportCalendar.get(Calendar2.HOUR_OF_DAY);
                //if (true) {  //uncomment to test daily report 

                if (!reportDate.equals(erddap.lastReportDate) && hour >= 7) {

                    erddap.lastReportDate = reportDate;
                    String stars = String2.makeString('*', 70);
                    String subject = "ERDDAP Daily Report " + cDateTimeLocal;
                    StringBuffer contentSB = new StringBuffer(subject + "\n\n");
                    EDStatic.addIntroStatistics(contentSB);

                    //append number of active threads
                    String traces = MustBe.allStackTraces(true, true);
                    int po = traces.indexOf('\n');
                    if (po > 0)
                        contentSB.append(traces.substring(0, po + 1));

                    contentSB.append(Math2.memoryString() + " " + Math2.xmxMemoryString() + "\n\n");
                    contentSB.append(stars + "\nTallied Usage Information\n\n");
                    contentSB.append(EDStatic.tally.toString(50));
                    EDStatic.addCommonStatistics(contentSB);

                    contentSB.append("\n" + stars + 
                        "\nWarnings from LoadDatasets\n\n");
                    contentSB.append(warningsFromLoadDatasets);

                    contentSB.append("\n" + stars + "\n");
                    contentSB.append(traces);

                    //clear all the "last 24 hours" tallies
                    EDStatic.tally.remove("Categorize Attribute (last 24 hours)");
                    EDStatic.tally.remove("Categorize Attribute / Value (last 24 hours)");
                    EDStatic.tally.remove("Categorize File Type (last 24 hours)");
                    EDStatic.tally.remove("griddap DatasetID (last 24 hours)");
                    EDStatic.tally.remove("griddap File Type (last 24 hours)");
                    EDStatic.tally.remove("Home Page (last 24 hours)");
                    EDStatic.tally.remove("Info (last 24 hours)");
                    EDStatic.tally.remove("Info File Type (last 24 hours)");
                    EDStatic.tally.remove("Log in (last 24 hours)");
                    EDStatic.tally.remove("Log out (last 24 hours)");
                    EDStatic.tally.remove("Log in Redirect (last 24 hours)");
                    EDStatic.tally.remove("Main Resources List (last 24 hours)");
                    EDStatic.tally.remove("MemoryInUse > 3/4 MaxMemory (last 24 hours)");
                    EDStatic.tally.remove("Protocol (last 24 hours)");
                    EDStatic.tally.remove("Requester Is Logged In (last 24 hours)");
                    EDStatic.tally.remove("Request refused: not enough memory currently (last 24 hours)");
                    EDStatic.tally.remove("Request refused: not enough memory ever (last 24 hours)");
                    EDStatic.tally.remove("Requester's IP Address (Allowed) (since last daily report)");
                    EDStatic.tally.remove("Requester's IP Address (Blocked) (since last daily report)");
                    EDStatic.tally.remove("RequestReloadASAP (last 24 hours)");
                    EDStatic.tally.remove("RSS (last 24 hours)");
                    EDStatic.tally.remove("Search File Type (last 24 hours)");
                    EDStatic.tally.remove("Search For (last 24 hours)");
                    EDStatic.tally.remove("SetDatasetFlag (last 24 hours)");
                    EDStatic.tally.remove("Subscriptions (last 24 hours)");
                    EDStatic.tally.remove("tabledap DatasetID (last 24 hours)");
                    EDStatic.tally.remove("tabledap File Type (last 24 hours)");
                    EDStatic.tally.remove("WMS doWmsGetMap (last 24 hours)");
                    EDStatic.tally.remove("WMS doWmsGetCapabilities (last 24 hours)");
                    EDStatic.tally.remove("WMS doWmsOpenLayers (last 24 hours)");
                    EDStatic.failureTimesDistribution24      = new int[String2.DistributionSize];
                    EDStatic.majorLoadDatasetsDistribution24 = new int[String2.DistributionSize];
                    EDStatic.minorLoadDatasetsDistribution24 = new int[String2.DistributionSize];
                    EDStatic.responseTimesDistribution24     = new int[String2.DistributionSize];

                    erddap.todaysNRequests = 0;
                    String2.log("\n" + stars);
                    String2.log(contentSB.toString());
                    String2.log("\n" +stars + 
                        "\nEnd of ERDDAP Daily Report\n");

                    //after write to log (before email), add URLs to setDatasetFlag (so only in email to admin)
                    //since log may be seen by clients if displayDiagnosticInfo is true
                    contentSB.append("\n" + stars + 
                        "\nsetDatasetFlag URLs can be used to force a dataset to be reloaded (treat as confidential information)\n\n");
                    StringArray datasetIDs = new StringArray();
                    datasetIDs.add(String2.toStringArray(erddap.gridDatasetHashMap.keySet().toArray()));
                    datasetIDs.add(String2.toStringArray(erddap.tableDatasetHashMap.keySet().toArray()));
                    datasetIDs.sort();
                    for (int ds = 0; ds < datasetIDs.size(); ds++) {
                        contentSB.append(EDD.flagUrl(datasetIDs.get(ds)));
                        contentSB.append('\n');
                    }

                    //after write to log (before email), add subscription info (so only in email to admin)
                    //since log may be seen by clients if displayDiagnosticInfo is true
                    if (EDStatic.subscriptionSystemActive) {
                        try {
                            contentSB.append("\n\n" + stars + 
                                "\nTreat Subscription Information as Confidential:\n");
                            contentSB.append(EDStatic.subscriptions.listSubscriptions());
                        } catch (Throwable lst) {
                            contentSB.append("LoadDatasets Error: " + MustBe.throwableToString(lst));
                        }
                    } else {
                        contentSB.append("\n\n" + stars + 
                            "\nThe email/URL subscription system is not active.\n");
                    }

                    //write to email
                    contentSB.append("\n" +stars + 
                        "\nEnd of ERDDAP Daily Report\n");
                    String content = contentSB.toString();
                    EDStatic.email(EDStatic.emailEverythingTo, subject, content);
                    EDStatic.email(EDStatic.emailDailyReportTo, subject, content);
                } else {
                    //major load, but not daily report
                    StringBuffer sb = new StringBuffer();
                    EDStatic.addIntroStatistics(sb);

                    //append number of active threads
                    String traces = MustBe.allStackTraces(true, true);
                    int po = traces.indexOf('\n');
                    if (po > 0)
                        sb.append(traces.substring(0, po + 1));

                    sb.append(Math2.memoryString() + " " + Math2.xmxMemoryString() + "\n\n");
                    EDStatic.addCommonStatistics(sb);
                    sb.append(EDStatic.tally.toString("Requester's IP Address (Allowed) (since last Major LoadDatasets)", 50));
                    sb.append(EDStatic.tally.toString("Requester's IP Address (Blocked) (since last Major LoadDatasets)", 50));
                    sb.append(traces);
                    String2.log(sb.toString());

                    //email if some threshold is surpassed???
                    int nFailed    = String2.getDistributionN(EDStatic.failureTimesDistributionLoadDatasets);
                    int nSucceeded = String2.getDistributionN(EDStatic.responseTimesDistributionLoadDatasets);
                    if (nFailed + nSucceeded > EDStatic.unusualActivity) //high activity level
                        EDStatic.email(EDStatic.emailEverythingTo, 
                            "Unusual Activity: lots of requests", sb.toString());
                    else if (nFailed > 10 && nFailed > nSucceeded / 4)    // >25% of requests fail
                        EDStatic.email(EDStatic.emailEverythingTo, 
                            "Unusual Activity: >25% of requests failed", sb.toString());
                }

                //after every major loadDatasets
                EDStatic.tally.remove("Requester's IP Address (Allowed) (since last Major LoadDatasets)");
                EDStatic.tally.remove("Requester's IP Address (Blocked) (since last Major LoadDatasets)");
                EDStatic.failureTimesDistributionLoadDatasets  = new int[String2.DistributionSize];
                EDStatic.responseTimesDistributionLoadDatasets = new int[String2.DistributionSize];
                removeOldLines(EDStatic.memoryUseLoadDatasetsSB,     101, 82);
                removeOldLines(EDStatic.failureTimesLoadDatasetsSB,  101, 59);
                removeOldLines(EDStatic.responseTimesLoadDatasetsSB, 101, 59);
            }

        } catch (Throwable t) {
            if (xmlReader != null) 
                xmlReader.close();
            if (!isInterrupted()) {
                String subject = "Error while processing datasets.xml at " + 
                    Calendar2.getCurrentISODateTimeStringLocal();
                String content = MustBe.throwableToString(t); 
                unexpectedError = subject + ": " + content;
                String2.log(unexpectedError);
                EDStatic.email(EDStatic.emailEverythingTo, subject, content);
            }
        }

    }

    /** Given a newline separated string in sb, this keeps the newest approximately keepLines. */
    static void removeOldLines(StringBuffer sb, int keepLines, int lineLength) {
        if (sb.length() > (keepLines+1) * lineLength) {
            int po = sb.indexOf("\n", sb.length() - keepLines * lineLength);
            if (po > 0)
                sb.delete(0, po + 1);
        }
    }

    /**
     * This makes a copy of catInfo, minus any references to id.
     * (Work on a copy of categoryInfo so "erddap.categoryInfo = newCatInfo"
     *    can be done in an instant.)
     *
     * @param oCatInfo hashMap of hashMaps of hashSets
     * @param id the datasetID that should be removed from catInfo
     * @return a copy of catInfo, minus any references to id.
     */
    protected static Map copyCatInfoMinusId(Map oCatInfo, String id) {
        //Note that oCatInfo (erddap.categoryInfo) is only read from, never written to

        //make new catInfo (always with first level hashMaps)
        Map nCatInfo = Collections.synchronizedMap(new HashMap());
        int nCat = EDStatic.categoryAttributes.length;
        for (int cat = 0; cat < nCat; cat++) {
            String catName = EDStatic.categoryAttributes[cat];

            //make new HashMap for a catName (e.g. institution)
            HashMap nHm = new HashMap();
            nCatInfo.put(catName, nHm);

            //get old HashMap for a catName (e.g. institution)
            HashMap oHm = (HashMap)oCatInfo.get(catName);  
            Iterator it = oHm.entrySet().iterator();
            while (it.hasNext()) {
                //get old HashSet for a catAtt (e.g., ndbc)
                Map.Entry me = (Map.Entry)it.next();
                Object catAtt = me.getKey();
                HashSet oHs = (HashSet)me.getValue();

                //make the new HashSet minus id
                HashSet nHs = (HashSet)oHs.clone(); //shallow is ok
                nHs.remove(id);

                //if entries, add it to nHm
                if (nHs.size() > 0)
                    nHm.put(catAtt, nHs);
            }
        }
        return nCatInfo;
    }


    /**
     * This adds the dataset's datasetID to the proper places in catInfo.
     *
     * @param catInfo the new categoryInfo hashMap of hashMaps of hashSets
     * @param edd the dataset who's info should be added to catInfo
     */
    protected static void addDatasetInfo(Map catInfo, EDD edd) {
        int institutionCat = String2.indexOf(EDStatic.categoryAttributes, EDStatic.INSTITUTION);

        //go through the gridDatasets
        String id = edd.datasetID();

        //institution is a special case since it is a dataset attribute
        categorizeInstitution(catInfo, edd, id);

        //go through data variables
        int nd = edd.dataVariables().length;
        for (int dv = 0; dv < nd; dv++) 
            categorizeCatAtt(catInfo, edd.dataVariables()[dv], id);
        
        if (edd instanceof EDDGrid) {
            EDDGrid eddGrid = (EDDGrid)edd;
            //go through axis variables
            int na = eddGrid.axisVariables().length;
            for (int av = 0; av < na; av++) 
                categorizeCatAtt(catInfo, eddGrid.axisVariables()[av], id);
        }
    }

    /** 
     * This categorizes the institution of an EDD. 
     *
     * @param catInfo the new categoryInfo
     * @param edd 
     * @param id the edd.datasetID()
     */
    protected static void categorizeInstitution(Map catInfo, EDD edd, String id) {
        if (institutionCat < 0)
            return;
        String catAtt = String2.modifyToBeFileNameSafe(edd.institution());
        if (catAtt.length() > 0) 
            addIdToCatInfo(catInfo, EDStatic.INSTITUTION, catAtt, id);
    }

    /** 
     * This categorizes the categoryAttributes of an EDV. 
     *
     * @param catInfo the new categoryInfo
     * @param edv 
     * @param id the edd.datasetID()
     */
    protected static void categorizeCatAtt(Map catInfo, EDV edv, String id) {
        int nCat = EDStatic.categoryAttributes.length;
        for (int cat = 0; cat < nCat; cat++) {
            if (cat != institutionCat) {
                String catName = EDStatic.categoryAttributes[cat]; //e.g., standard_name
                String catAtt = String2.modifyToBeFileNameSafe(  //e.g., sea_water_temperature
                    edv.combinedAttributes().getString(catName));
                if (catAtt.length() > 0) 
                    addIdToCatInfo(catInfo, catName, catAtt, id);
            }
        }
    }

    /**
     * This adds a datasetID to a categorization.
     * 
     * @param catInfo the new categoryInfo
     * @param catName e.g., institution
     * @param catAtt e.g., NDBC
     * @param id  the edd.datasetID() e.g., ndbcCWind41002
     */
    protected static void addIdToCatInfo(Map catInfo, 
        String catName, String catAtt, String id) {

        HashMap hm = (HashMap)catInfo.get(catName);
        HashSet hs = (HashSet)hm.get(catAtt);
        if (hs == null) {
            hs = new HashSet();
            hm.put(catAtt, hs);
        }
        hs.add(id);
    }


}
