import {deburr, escapeRegExp} from "lodash";
import React from "react";
import {endRegexEval, NBSP, RE_PLEX_ID, startRegexEval} from "../lib/utils";

export const MARKDOWN_CHARS = "[[\\]`#*_]";
export const RE_MD_CHARS_ESCAPED = new RegExp(`\\\\(${MARKDOWN_CHARS})`, 'g');

// Misc utilities with no better home ATM
// WARNING: must NOT change the length of the text
export const normalize = text => (
  //text.normalize("NFD").replace(/[\u0300-\u036f]/g, "")
  deburr(text)
);

export const hasMark = (element) => {
  return element?.querySelector('mark');
};

export const markMatchingText = (withinText, filter, params = {}) => {
  const {asString = false, attrs = {}} = params;
  // Check the full filter, as well as individual words within it
  // Also check word blocks that exclude the final word
  if (!filter || !withinText || typeof (withinText) !== "string") {
    return withinText;
  }
  const options = [normalize(filter)];
  const words = options[0].trim().split(/\s/).filter(el => el && el.length > 2);
  if (words.length > 1) {
    options.push(words.slice(0, -1).join(" "));
  }
  options.push(...words);
  const searchPhrases = Array.from(new Set(options))
    .sort((a, b) => b.length - a.length);

  const protectedLinks = {};
  let idx = 0;
  const sansLinks = withinText
    .replace(/(^\[[^\]]+]:.*$|((?<=\[[^\]]+])(\[[^\]]+]|\([^)]+\))))/mg, expr => {
      const key = `____${++idx}____`;
      protectedLinks[key] = expr;
      return key;
    });
  //console.log("With links protected", escapedText);
  const markPhrase = (phrase, inText, keyBase = "key") => {
    // Match any non-word characters in the input phrase, and allow for extra dash or space preceding digits (i.e.
    // allow "MEK2" to match "MEK-2" or "MEK 2")
    const re_phrase = looseMatchRegExp(phrase);
    const output = [];
    const normalized = normalize(inText);
    let offset = 0;
    let match;
    while ((match = re_phrase.exec(normalized)) !== null) {
      if (match.index > offset) {
        const prefix = inText.substring(offset, match.index);
        output.push(prefix);
      }
      const matchedText = inText.substring(match.index, match.index + match[0].length);
      output.push(mark(matchedText, {asString, attrs: {key: `${keyBase}-${++idx}`}}));
      offset = re_phrase.lastIndex;
    }
    if (offset < inText.length) {
      output.push(inText.substring(offset));
    }
    return output;
  }
  const marked = [sansLinks];
  searchPhrases.forEach((phrase, idx) => {
    const updated = [];
    marked.forEach(value => {
      if (typeof (value) !== "string" || value.startsWith("<mark")) {
        // Already marked, leave it alone
        updated.push(value);
      } else if (value) {
        updated.push(...markPhrase(phrase, value, `${phrase.replace(/\W+/g,"-")}-${idx}`));
      }
    });
    marked.splice(0, updated.length, ...updated);
  });
  const withRestoredLinks = marked.map(el => typeof (el) === "string"
                                             ? el.replace(/____\d+____/mg, expr => protectedLinks[expr])
                                             : el);
  //console.log("Output elements", withRestoredLinks);
  return asString ? withRestoredLinks.join(
    "") : withRestoredLinks.length === 1 ? withRestoredLinks[0] : withRestoredLinks;
}

export const mark = (text, {asString = false, attrs = {}}) => {
  if (asString) {
    const sattrs = Object.entries(attrs).reduce((result, [key, value]) => (
      `${result} ${key}="${value}"`
    ), "");
    return `<mark${sattrs}>${text}</mark>`;
  }
  return (<mark {...attrs}>{text}</mark>);
};

// Cf python highlight matching in autosuggest.py
// Ensure "ICG001" will match "ICG-001" or "ICG:001" or "ICG 001" and vice versa
export const looseMatchRegExp = (phrase, flags = 'giu') => {
  const escaped = escapeRegExp(phrase
                                 .replace(/\s+/g, "ssSPACEss")
                                 .replace(/[\W_]/gu, "__DOT__"));
  const simplified = escaped
    .replace(/ssSPACEss/gu, "\\s")
    .replace(/__DOT__(__DOT__)*/gu, "((?![\n*])[\\W_])?");
  const relaxed = simplified
    .replace(/(?<=[^\d\s\W])(\d)/gu, "[-:\\s]?$1");
  return new RegExp(relaxed, flags);
};

const hideMarkdownChars = s => {
  const modified = s.replace(new RegExp(`((?<!\\\\)${MARKDOWN_CHARS})`, "gs"), "---\\$1---");
  return modified === s ? s : modified;
};
const restoreMarkdownChars = s => {
  const modified = s.replace(new RegExp(`---\\\\(${MARKDOWN_CHARS})---`, "gs"), "$1");
  return modified === s ? s : modified;
};

// Patch up any markdown which may have inadvertent escapes or other problematic patterns
export const prepareMarkdown = s => {
  // ESCAPE_THIS {NOT_THIS}
  // [ESCAPE_THIS][NOT_THIS]
  // [ESCAPE_THIS](NOT_THIS)
  // (ESCAPE_THIS) (https://NOT_THIS)
  // FIXME is this really necessary with a proper escapeMD and usage?
  // FIXME probably doesn't need any interpolation-related stuff, since these rules are applied post-interpolation
  try {
    startRegexEval(s, "prepareMarkdown");
    let modified = s
      // Based on extant markup from interpro data
      .replace(/<br\/>/gi, "\n")
      .replace(/\s*<ul>([^\S\n]*\n|$)/gi, "\n")
      .replace(/\s*<\/ul>([^\S\n]*\n|$)/gi, "\n\n")
      // Fix up standalone URLs (w/parens)
      .replace(/(?<!])(\((https?:\/\/[^)]+?\/)([^\/)]+)\))/gi, (match, parents, url, page) => {
        return `([${page.replace(/\\?_/, " ")}](${url}${page}))`;
      })
      // Escape underscores where we can be pretty sure they're not markdown
      .replace(/(?<=[A-Za-z0-9])(?!interpolated)_(?!inline)(?=[A-Za-z0-9])/g, "\\_")
      // Preserve space after the mark and preceding a previously-existing span
      .replace(/^(<mark.*?<\/mark>) +(?=<span[^>]*>)/gm, `$1${NBSP}`)
      // Markdown will convert "^<mark>.*</mark>(.*)" into "<span><mark>.*</mark><p>$1</p></span>", which introduces
      // a line break after the marked text.  Wrap the trailing raw text in a span to avoid the line break.
      // This is only an issue when <mark> is at the beginning of line
      .replace(/^(<mark[^>]*>[^<]+<\/mark>(?!=<span))(.+)$/gm, "$1<span class='prepared'>$2</span>")
      // Preserve any space at the beginning of an inserted span directly after marked text (only if at BOL)
      // Avoid stomping on <ul>/<ol>
      .replace(/^(<mark.*?<\/mark><span[^>]*>)? (?!= $|[*\d])/gm, `$1${NBSP}`)
      // Preserve double-newline after </span>, but only when line starts with <mark>
      .replace(/^(<mark.*?<\/span>)( {2}|\n)\n/gm, "$1<p/>")
      // Escape underscores around marked text
      .replace(/((?<=<\/mark>)_|_(?=<mark))/g, "\\_")
      // No escape within interpolation braces
      .replace(/(?<={)([^}]+?\\_[^}]+)(?=})/gs, m => m.replace(/\\_/g, "_"))
      // No escape within hyperlink link definition
      .replace(/(?<!\\)(?<=]\()([^)]+?\\_[^)]+)(?=\))/gs, m => m.replace(/\\_/g, "_"))
      // No escape within reference link definition
      .replace(/(?<!\\)(?<=]\[)(\s*[^\]]+?\\_[^\]]+)(?=])/gs, m => m.replace(/\\_/g, "_"))
      // No escape between <code> tags (between backticks)
      .replace(/(?<=`)([^`]+?\\_[^`]+)(?=`)/gs, m => m.replace(/\\_/g, "_"))
      // No escape in attribute values (double-quoted strings)
      .replace(/(?<==")([^"]+?\\_[^"]+)(?=")/gs, m => m.replace(/\\_/g, "_"))
      // Fix up ul/ol
      .replace(/\n<[uo]l>\n/gi, "\n")
      .replace(/\n<\/[uo]l>\n/gi, "\n\n")
      // Preserve all markdown characters within angle brackets
      .replace(/(<[^>]+>)/gs, match => hideMarkdownChars(match))
      // Escape "[M](N)" patterns that are probably not links (e.g. IUPAC names)
      // e.g. dioxooctahydro-1h-pyrazino[1,2-a](6s,9as)-n-benzyl
      .replace(/(?<=[^\\])\[[^\]]*(?<!\\)]\(([^)]+)\)/,
               (match, url) => /^(#|https?:)/.test(url) || RE_PLEX_ID.test(url)
                               ? match : match.replace(/([[\]])/g, "\\$1"))
    // Escape brackets within MD link text (e.g. IUPAC names)
    // Make sure there are actually nested brackets within an MD link label before escaping,
    // otherwise the nested bracket capture will recurse forever due to the OR match.
    if (/(?<!\\)\[[^\]]*(?<!\\)\[.*?(?!\\])]\((https?|[a-z][-_0-9a-z]+):[^)\s]+\)/gm.test(s)) {
      // escape square brackets within MD links (typically IUPAC names) ==> problematic
      modified = modified.replace(/(?<!\\)\[((?:(?!\\)\[[^\[\]]+?(?<!\\)]|[^\[\]]+?)+?)(?!<\\)]\(([^)]+)\)/gm,
                                  (match, label, url) => {
                                    if (/^(#|https?:)/.test(url) || RE_PLEX_ID.test(url)) {
                                      return `[${label.replace(/([\[\]])/gm, "\\$1")}](${url})`;
                                    }
                                    return match.replace(/([\[\]])/gm, "\\$1");
                                  })
    }
    return restoreMarkdownChars(modified === s ? s : modified);
  }
  finally {
    endRegexEval(s);
  }
};

export const escapeMD = (obj) => {
  if (obj.map) {
    return obj.map(x => escapeMD(x));
  }
  const sobj = obj.toString();
  try {
    startRegexEval(sobj, "escapeMD");
    const s = sobj
      // Preserve any MD chars within html markup
      .replace(/(<[^>]+>)/gs, match => hideMarkdownChars(match))
      // Escape any MD chars not already escaped
      .replace(new RegExp(`(?<!\\\\)(${MARKDOWN_CHARS})`, "gs"), "\\$1");
    return restoreMarkdownChars(s);
  }
  finally {
    endRegexEval(sobj);
  }
};

export const unescapeMD = (obj) => {
  const sobj = obj.toString();
  try {
    startRegexEval(sobj, "unescapeMD");
    return sobj
      .replace(RE_MD_CHARS_ESCAPED, "$1");
  }
  finally {
    endRegexEval(sobj);
  }
};


export const getObjectProjection = (obj = {}, fields = []) =>
  fields.length === 0
    ? obj
    : fields.reduce(
        (output, fieldName) =>
          Object.assign(output, { [fieldName]: obj[fieldName] }),
        {}
      );

export const createSample = (name, template, data) => ({
  name: name,
  template: template,
  dataObject: data
});

