export interface BaseToken {
  pos: number
  length: number
}

export interface TextToken extends BaseToken {
  text: string
}

export interface WhitespaceToken extends BaseToken {
  whitespace: true
}

export type Token = TextToken | WhitespaceToken

export function isWhitespaceToken(token: BaseToken): token is WhitespaceToken {
  return Boolean(token?.length > 0 && (token as WhitespaceToken).whitespace)
}

export function isTextToken(token: BaseToken): token is TextToken {
  return Boolean(token?.length > 0 && (token as TextToken).text)
}

export const tokenize = (text: string) => {
  const tokens: Token[] = []
  if (!text) {
    return tokens
  }
  let pos = 0
  let matchText = text

  const reWord = /^(\w+)/
  const reWS = /^(\W+)/
  while (pos < text.length) {
    const wordMatch = matchText.match(reWord)
    if (wordMatch) {
      const word = wordMatch[1]
      tokens.push({
        text: word,
        pos,
        length: word.length,
      })
      pos += word.length

      matchText = matchText.slice(word.length)
    }

    /** Now eat whitespace */
    const wsMatch = matchText.match(reWS)
    if (!wsMatch) {
      break
    }
    const wsLength = wsMatch[1].length
    tokens.push({
      whitespace: true,
      length: wsLength,
      pos,
    })
    pos += wsLength
    matchText = matchText.slice(wsLength)
  }

  return tokens
}
