export type UnicodeEmoji = {
  name: string;
  character: string;
};

export type EmojiByGroup = {
  [group: string]: {
    emoji: UnicodeEmoji[];
    subgroups: {
      [subgroup: string]: {
        emoji: UnicodeEmoji[];
      };
    };
  };
};

const groupLineRegex = /^#\sgroup:\s(?<group>.*)$/;
const subgroupLineRegex = /^#\ssubgroup:\s(?<group>.*)$/;
const emojiRowRegex =
  /^(?<unicode>[0-9A-F\s]*)\s*;\s*(?<status>[a-z-]*)\s*#\s*(?<emoji>[^\s]*)\s*E[0-9]*\.[0-9]*\s*(?<description>[a-z:"-\s]*)$/;

const defaultGroupName = "Unknown";

export class UnicodeEmojiParser {
  protected currentGroupName: string | null = null;
  protected currentSubgroupName: string | null = null;

  public parseFromString(value: string, exclude: string[] = []) {
    const groups: EmojiByGroup = {};

    value.split("\n").forEach((line) => {
      line = line.trim();

      if (this.isGroupLine(line)) {
        this.currentSubgroupName = null;
        this.currentGroupName = line.match(groupLineRegex)!.groups!.group;
      }

      if (this.isSubgroupLine(line)) {
        this.currentSubgroupName = line.match(subgroupLineRegex)!.groups!.group;
      }

      if (this.isEmojiLine(line)) {
        const { unicode, status, description, emoji } =
          this.parseEmojiLine(line);
        if (exclude.includes(unicode)) return;
        if (status === "unqualified" || status === "component") return;

        const groupName = this.currentGroupName ?? defaultGroupName;

        if (!(groupName in groups)) {
          groups[groupName] = {
            emoji: [],
            subgroups: {},
          };
        }

        if (this.currentSubgroupName !== null) {
          if (!(this.currentSubgroupName in groups[groupName].subgroups)) {
            groups[groupName].subgroups[this.currentSubgroupName] = {
              emoji: [],
            };
          }

          groups[groupName].subgroups[this.currentSubgroupName].emoji.push({
            name: this.parseDescription(description),
            character: emoji,
          });
        } else {
          groups[groupName].emoji.push({
            name: this.parseDescription(description),
            character: emoji,
          });
        }
      }
    });

    return groups;
  }

  /**
   * Parses rows from the emoji test text file.
   * These rows are in the format:
   * <unicode> [...modifiers] ; <status> ; <emoji-character> <version> <description>
   */
  protected parseEmojiLine(row: string) {
    const groups = row.match(emojiRowRegex)?.groups;
    if (!groups) throw new Error("invalid_emoji_row");

    const { unicode, status, emoji, description } = groups;

    return {
      unicode: unicode.trim(),
      emoji: emoji.trim(),
      status: status.trim(),
      description: description.trim(),
    };
  }

  protected isGroupLine(line: string) {
    return groupLineRegex.test(line);
  }

  protected isSubgroupLine(line: string) {
    return subgroupLineRegex.test(line);
  }

  protected isEmojiLine(line: string) {
    return emojiRowRegex.test(line);
  }

  protected parseDescription(description: string) {
    return description
      .replaceAll(" ", "-")
      .replaceAll(":", "")
      .replaceAll('"', "")
      .toLowerCase();
  }
}
