//
// For information on how to use these utilities, please see:
//
//   https://github.com/fiberplane/monofiber/blob/main/studio/docs/UNICODE.md
//

/**
 * Counts the number of Unicode Scalar Values (non-surrogate codepoints) in a
 * string.
 *
 * If `index` is provided, it only counts up to the given index or up to the end
 * of the string, whichever comes first.
 *
 * Will throw if the strings ends with an unclosed surrogate pair.
 */
export function charCount(string: string, index?: number): number {
  let count = 0;

  const len = string.length;
  const end = index === undefined ? len : Math.min(index, len);
  for (let i = 0; i < end; i++) {
    count++;

    // Skip over surrogate pairs so they get counted as one:
    if (isHighSurrogate(string.charCodeAt(i))) {
      i++;
      if (i === len) {
        throw new Error("Unclosed surrogate pair");
      }
    }
  }

  return count;
}

/**
 * Returns the index at which the Unicode Scalar Value (non-surrogate codepoint)
 * with the given offset can be found.
 *
 * Will throw if the given offset is beyond the end of the string.
 *
 * An optional start index from which to start counting may be specified. If a
 * start index is given, the `offset` is also considered to be relative from
 * that index.
 */
export function charIndex(
  string: string,
  offset: number,
  startIndex = 0,
): number {
  let index = startIndex;

  for (let count = 0; count < offset; count++) {
    const charCode = string.charCodeAt(index);
    if (Number.isNaN(charCode)) {
      if (index === string.length && count === offset) {
        break;
      } else {
        throw new RangeError("Offset out of bounds");
      }
    }

    // Skip over surrogate pairs so they get counted as one:
    if (isHighSurrogate(charCode)) {
      index++;
    }

    index++;
  }

  return index;
}

/**
 * Returns a slice of a string, similar to `string.slice()`, but using character
 * offsets instead.
 */
export function charSlice(string: string, start: number, end?: number): string {
  const startIndex = charIndex(string, start);
  return string.slice(
    startIndex,
    end && charIndex(string, end - start, startIndex),
  );
}

/**
 * Returns when a given character code is a "high surrogate", which indicates
 * the start of a Unicode surrogate pair.
 */
export function isHighSurrogate(charCode: number): boolean {
  return (charCode & 0xfc00) === 0xd800;
}
