import Tokenizr from 'tokenizr'
import { chordsRegexp } from '../chord/chord-regexp.js'
import { type Token } from './token.js'

export type SongToken = Token<'chord' | 'word' | 'section'>

export const tokenize = (text: string) => {
  const lexer = new Tokenizr()

  lexer.rule(chordsRegexp, ctx => {
    ctx.accept('chord')
  })

  lexer.rule(/\[(\S+)(.*)\]/, (ctx, match) => {
    ctx.accept('section', match[1])
  })

  /** For now we accept everything not a space as a word */
  lexer.rule(/[^ \t\r\n]+/, ctx => {
    ctx.accept('word')
  })
  lexer.rule(/\/\/[^\r\n]*\r?\n/, ctx => {
    ctx.ignore()
  })
  lexer.rule(/[ \t\r\n]+/, ctx => {
    ctx.ignore()
  })

  lexer.input(text)

  // lexer.debug(true)

  return lexer.tokens() as SongToken[]
}

export default tokenize
