import * as wanakana from 'wanakana'
import * as R from 'ramda'
import * as Diff from 'diff'

const progSpecMap = [
  ['onnichi wa', 'onnichiwa'],
  [' pp', 'pp'],
  ['Desu.', 'desu.'],
  ['iniro', 'in iro'],
  ['mennasai', 'men nasai'],
  ['anjou bi', 'anjoubi'],
  ['goーrudenwiーku', 'goーruden wiーku'],
  ['oyasuminasai', 'oyasumi nasai']
  // ['hayougozaimasu', 'hayou gozaimasu']
]

export const createPronunciationMap = (breakdown) => {
  let pronunciationMap = ''
  breakdown?.forEach((bd) => {
    const { pos_detail_1, pos_detail_2, pronunciation = '', reading = '', surface_form = '' } = bd

    if (['接続助詞'].includes(pos_detail_2) || ['係助詞', '格助詞'].includes(pos_detail_1)) {
      pronunciationMap += pronunciation.trim()
    } else if (reading && !/[０-９]+/.test(surface_form)) {
      pronunciationMap += reading.trim()
    } else {
      pronunciationMap += surface_form.trim()
    }
  })
  return pronunciationMap.replace(/[一-龯]/g, '')
}

const NOUN = '名詞'
const ADVERBABLE = '副詞可能'
const PARTICLE = '助詞'

export const generateSpaces = (breakdown) => {
  const spaces = [], capitals = [], pipes = []
  let currentPos = 0

  breakdown?.forEach((bd, idx) => {
    const { reading = '', basic_form = '', surface_form = '', pos_detail_1 = '', pos_detail_2 = '', pos = '' } = bd
    const readingLength = reading.trim().length

    if (!reading) return

    // certain nouns
    if (pos === NOUN && pos_detail_1 === '一般') {
      spaces.push(currentPos + readingLength - 1)
    }

    if (pos === NOUN && basic_form?.length > 1 && basic_form?.includes('月') && pos_detail_1 !== ADVERBABLE) {
      // months
      spaces.push(currentPos - 1 + readingLength - 2)
    } else if (pos_detail_2 === '助数詞') {
      // counters
      spaces.push(currentPos + readingLength - 1)
    } else if (pos_detail_1 === ADVERBABLE && basic_form?.includes('日')) {
      // days
      spaces.push(currentPos + readingLength - 4)
    } else if (pos === NOUN && ['サ変接続', ADVERBABLE].includes(pos_detail_1)) {
      spaces.push(currentPos + readingLength - 1)
    } else if (pos_detail_1 === '代名詞' && basic_form === '何') {
      // nan before stuff
      spaces.push(currentPos + readingLength - 1)
    } else if (basic_form === '月' && idx > 0) {
      spaces.push(currentPos - 1)
    } else if (basic_form === 'する' || basic_form === 'ある') {
      // suru
      spaces.push(currentPos - 1)
      spaces.push(currentPos + readingLength - 1)
    } else if (pos === '助詞' && pos_detail_1 !== '接続助詞') {
      // particles
      spaces.push(currentPos - 1)
      if (surface_form !== 'じゃ') spaces.push(currentPos + readingLength - 1)
    } else if (pos === '感動詞') {
      spaces.push(currentPos - 1)
    } else if (surface_form === 'ない') {
      // janai
      if (idx > 0 && breakdown[idx - 1].surface_form !== 'じゃ' && (breakdown[idx - 1].conjugated_form !== '連用テ接続' && breakdown[idx - 1].conjugated_form !== 'ガル接続')) {
        spaces.push(currentPos - 1)
      }
    } else if (pos === '副詞' || (pos === '形容詞'
      && ((idx + 1) < breakdown.length && breakdown[idx + 1].surface_form !== 'ない')
      && ((idx + 1) < breakdown.length && breakdown[idx + 1].conjugated_type !== '特殊・タ'))) {
      // adverbs/adjectives
      if (currentPos > 0) spaces.push(currentPos - 1)
      spaces.push(currentPos + readingLength - 1)
    } else if (pos === '助動詞' && (surface_form === 'です' || basic_form === 'ござる' || basic_form === 'です')) {
      // auxiliary verbs
      spaces.push(currentPos - 1)
    } else if (pos === '記号' && pos_detail_1 === '読点') {
      // commas
      spaces.push(currentPos)
    } else if (pos_detail_1 === '接尾' && basic_form !== '日' && basic_form !== '語') {
      // san
      spaces.push(currentPos - 1)
      if (basic_form === '年' || basic_form === '中' || basic_form === '円') spaces.push(currentPos + 1)
    } else if (pos_detail_1 === '数' && breakdown[idx].basic_form !== '十' && !(idx > 0 && breakdown[idx - 1].basic_form === '十')) {
      // numbers
      // console.log('numbers')
      spaces.push(currentPos - 1)
      pipes.push(currentPos - 1)
    } else if (pos_detail_1 === '数' && idx < breakdown.length - 1 && breakdown[idx + 1].basic_form === '十') {
      // nijuu
      spaces.push(currentPos - 1)
      pipes.push(currentPos - 1)
    } else if (pos_detail_1 === '非自立' && (idx > 0 && breakdown[idx - 1].basic_form !== 'て')) {
      // no particle
      spaces.push(currentPos - 1)
    }

    if (idx > 0 && pos_detail_2 === '人名' && pos_detail_1 !== '接尾') {
      // capitals mid-sentence
      capitals.push(currentPos)
      spaces.push(currentPos - 1)
    }

    currentPos += readingLength || surface_form.trim().length
  })

  // console.log(spaces, capitals, pipes)
  return [spaces, capitals, pipes]
}

export const getSpaces = (breakdown, idx) => {
  const spaces = [], capitals = [], pipes = []
  const returnData = { pre: false, post: false, capitalized: false }

  const { reading = '', basic_form = '', surface_form = '', pos_detail_1 = '', pos_detail_2 = '', pos = '' } = breakdown[idx]
  const readingLength = reading.trim().length

  if (!reading) return returnData

  // certain nouns
  if (pos === NOUN && pos_detail_1 === '一般') {
    returnData.post = true
  }

  if (pos === NOUN && basic_form?.length > 1 && basic_form?.includes('月') && pos_detail_1 !== ADVERBABLE) {
    // months
    returnData.post = true
  } else if (pos_detail_2 === '助数詞') {
    // counters
    returnData.post = true
  } else if (pos_detail_1 === ADVERBABLE && basic_form?.includes('日')) {
    // days
    returnData.post = true
  } else if (pos === NOUN && ['サ変接続', ADVERBABLE].includes(pos_detail_1)) {
    returnData.post = true
  } else if (pos_detail_1 === '代名詞' && basic_form === '何') {
    // nan before stuff
    returnData.post = true
  } else if (basic_form === '月' && idx > 0) {
    returnData.pre = true
  } else if (basic_form === 'する' || basic_form === 'ある') {
    // suru
    returnData.pre = true
    returnData.post = true
  } else if (pos === '助詞' && pos_detail_1 !== '接続助詞') {
    // particles
    returnData.pre = true
    if (surface_form !== 'じゃ') returnData.post = true
  } else if (pos === '感動詞') {
    returnData.pre = true
  } else if (surface_form === 'ない') {
    // janai
    if (idx > 0 && breakdown[idx - 1].surface_form !== 'じゃ' && (breakdown[idx - 1].conjugated_form !== '連用テ接続' && breakdown[idx - 1].conjugated_form !== 'ガル接続')) {
      returnData.pre = true
    }
  } else if (pos === '副詞' || (pos === '形容詞'
    && ((idx + 1) < breakdown.length && breakdown[idx + 1].surface_form !== 'ない')
    && ((idx + 1) < breakdown.length && breakdown[idx + 1].conjugated_type !== '特殊・タ'))) {
    // adverbs/adjectives
    returnData.pre = true
    returnData.post = true
  } else if (pos === '助動詞' && (surface_form === 'です' || basic_form === 'ござる' || basic_form === 'です')) {
    // auxiliary verbs
    returnData.pre = true
  } else if (pos === '記号' && pos_detail_1 === '読点') {
    // commas
    returnData.post = true
  } else if (pos_detail_1 === '接尾' && basic_form !== '日' && basic_form !== '語') {
    // san
    returnData.pre = true
    if (basic_form === '年' || basic_form === '中' || basic_form === '円') returnData.post = true
  } else if (pos_detail_1 === '数' && breakdown[idx].basic_form !== '十' && !(idx > 0 && breakdown[idx - 1].basic_form === '十')) {
    // numbers
    // console.log('numbers')
    returnData.pre = true
  } else if (pos_detail_1 === '数' && idx < breakdown.length - 1 && breakdown[idx + 1].basic_form === '十') {
    // nijuu
    returnData.pre = true
  } else if (pos_detail_1 === '非自立' && (idx > 0 && breakdown[idx - 1].basic_form !== 'て')) {
    // no particle
    returnData.pre = true
  }

  if (pos_detail_2 === '人名' && pos_detail_1 !== '接尾') {
    // capitals mid-sentence
    returnData.capitalized = true
    returnData.pre = true
  }

  // console.log(spaces, capitals, pipes)
  return returnData
}

const isKanji = (c) => wanakana.isKanji(c) || c === '々'

export const generateKanji = (text, newFuriMap = {}, prog_data = {}) => {
  let newText = ''

  for (const c of text) {
    if (isKanji(c) && newFuriMap[c] && !prog_data[c]) {
      newText += newFuriMap[c]
    } else {
      newText += c
    }
  }

  return newText
}

export const tokensToStr = (tokens) => {
  let finalStr = ''
  tokens.forEach((t) => {
    let str = ''

    // join text array
    const text = t.text.join('')

    // capitalize it if `capitalized` flag
    if (t.capitalized) str += text[0].toUpperCase() + text.slice(1)
    else str += text

    // add a space before if `pre`, after if `post`
    if (t.pre) str = ' ' + str
    if (t.post) str += ' '

    finalStr += str
  })

  return finalStr.replace(/  /g, ' ')
}

export const handleSpecialCases = (str, romaji) => {
  // console.log(str)
  let compiledString = str

  // account for japanese periods following english
  if (compiledString.length > 1 && compiledString[compiledString.length - 1] === '。' && wanakana.isRomaji(compiledString[compiledString.length - 2])) {
    compiledString = compiledString.replace(/。$/, '.')
  }

  if (romaji) compiledString = compiledString.replace(/\s*」\s*/, '" ')
  if (romaji) compiledString = compiledString.replace(/\s*「\s*/, ' "')
  if (romaji) compiledString = compiledString.replace('、', ',')
  if (romaji) compiledString = compiledString.replace(' ,', ',')
  compiledString = compiledString.replace(' .', '.')
  compiledString = compiledString.replace('..', '.')
  compiledString = compiledString.replace('.', '. ')
  compiledString = compiledString.replace('  ', ' ')
  // compiledString = compiledString.replace(' _ ', ' _____ ')

  compiledString = compiledString.replace(' ss', 'ss')

  compiledString = compiledString.replace(/(?:[.?!])\W*[a-z]/g, (i) => i.toUpperCase())
  compiledString = compiledString.replace(/[　-龯0-9０-９？！_] [　-龯0-9０-９？！_]/g, (i) => i.replace(' ', ''))
  compiledString = compiledString.replace(/[　-龯0-9０-９？！_]\.\s*$/g, (i) => i.replace('.', '。'))
  compiledString = compiledString.replace(' 。', '。')
  compiledString = compiledString.replace(/[a-zA-Z0-9]。/g, (i) => i.replace('。', '.'))
  compiledString = compiledString.replace(/[a-zA-Z]\.[a-zA-Z]/g, (i) => i.replace('.', '. '))
  compiledString = compiledString.replace(/Ii/g, 'ii')
  compiledString = compiledString.replace(' ?', '')
  compiledString = compiledString.replace('-', 'ー')

  // compiledString = compiledString.replace(/\s*ー\s*/, (i) => i.trim())

  // replace japanese commas inside english text
  compiledString = compiledString.replace(/[a-zA-Z]、/, (i) => i.replace('、', ','))
  compiledString = compiledString.replace(/[一-龯] [一-龯]/g, (i) => i.replace(' ', ''))
  compiledString = compiledString.replace(/[0-9] [0-9]/g, (i) => i.replace(' ', ''))

  compiledString = compiledString.replace(/「[a-zA-Z0-9]/g, (i) => i.replace('「', '"'))
  compiledString = compiledString.replace(/[a-zA-Z0-9]」/g, (i) => i.replace('」', '"'))

  compiledString = compiledString.replace(/,[　-龯]/g, (i) => i.replace(',', ', '))

  // (name) to moushimasu
  compiledString = compiledString.replace('( ', '(')
  compiledString = compiledString.replace(')', ') ')
  compiledString = compiledString.replace('((', '(')

  compiledString = compiledString.replace('イーメール', 'Eメール')

  compiledString = compiledString.replace(' ？', '？')

  if (compiledString.indexOf('(') !== -1 && compiledString.indexOf(')') === -1) {
    compiledString = compiledString.replace('(', '')
  }

  compiledString = compiledString.replace(/\. [a-z]/, (i) => i.toUpperCase())

  compiledString = compiledString.replace(' 、 ', ', ')
  compiledString = compiledString.replace(' !', '!')

  compiledString = compiledString.replace(/\|/g, ' ')
  compiledString = compiledString.replace(/ 、/g, '、')

  // console.log(compiledString)

  return compiledString
}

export const progressify = () => 'DEPRECATED'

const isSmall = (char) => {
  return ['ゃ', 'ょ', 'ゅ', 'ュ', 'ャ', 'ョ', 'ェ', 'ォ', 'ァ', 'ィ'].includes(char)
}

const toBig = (char) => {
  return {
    っ: 'つ',
    ッ: 'ツ',
  }[char]
}

/**
 * Convert a character to its progressive equivalent. This can mean either returning
 * the character as is, or converting it to romaji depending on what's been passed in
 * the progressive map. Often, the conversion is straightforward, but sometimes the
 * character will need to be altered slightly as a combination depending on whether
 * certain modifying characters (like small tsu) are used. This will typically manifest
 * as returning nothing if the character coming *up* is a modifying character, a special
 * combo if the current character is a small vowel, or a doubling-up if the previous
 * character was a small tsu. (Return nothing if small tsu)
 * @param char      The character we want to convert.
 * @param prevChar  The character coming before it.
 * @param nextChar  The character coming after it.
 * @param progMap   The user's progressive settings.
 * @param bonusData Overrides for forced display of kanji, kana, etc.
 */
export const convertToProg = (
  char,
  prevChar,
  nextChar,
  progMap = {},
  bonusData = null,
  force = null,
  isParticle = false,
  pronunciation = char
) => {
  // console.log('char:', char)
  // console.log('bonus data:', bonusData)
  // console.log(prevChar, char, nextChar)
  if (isSmall(char)) return ''
  if (isSmall(nextChar)) {
    // console.log(char, nextChar, char + nextChar)
    // create and return the compound now
    if (progMap[char + nextChar] || force === 'kanji') {
      if ((progMap[toBig(prevChar)] || force === 'kanji') && ['っ', 'ッ'].includes(prevChar)) return prevChar + char + nextChar
      // console.log('prog map entry for combo found')
      return char + nextChar
    }

    const combo = char + nextChar
    let romajiCombo

    // uniquely truncate this combo as kata
    if (['ェ', 'ォ', 'ァ', 'ィ'].includes(nextChar)) {
      if (combo === 'ウィ') romajiCombo = 'wi'
      else {
        romajiCombo = wanakana.toRomaji(combo[0]).slice(0, -1)
        romajiCombo += wanakana.toRomaji(combo[1])
      }
      return romajiCombo
    }

    if (['っ', 'ッ'].includes(prevChar)) {
      return wanakana.toRomaji(prevChar + char + nextChar)
    }

    return wanakana.toRomaji(combo)
  }

  if (['っ', 'ッ'].includes(prevChar)) {
    // double start of current character if last was small tsu
    if ((progMap[char] && progMap[toBig(prevChar)]) || force === 'kanji') {
      // console.log(prevChar + char)
      return prevChar + char
    }
    let combo = wanakana.toRomaji(prevChar + char)
    if (combo === 'tchi') combo = 'cchi'
    // console.log(combo)
    return combo
  }

  if (force === 'kanji' || progMap[char] || !wanakana.isKana(char) || char === 'ー') return char

  // if this is a particle, we should check the breakdown's pronunciation
  // field and return that (rather than something hardcoded)
  if (!bonusData?.kana_override && isParticle && pronunciation?.length === 1) return wanakana.toRomaji(pronunciation)

  // exception for small ぁ; check whether the big version is in the map
  if (char === 'ぁ' && progMap['あ']) return 'ぁ'
  if (char === 'ぅ' && progMap['う']) return 'ぅ'
  if (char === 'ぇ' && progMap['え']) return 'ぇ'
  if (char === 'ぉ' && progMap['お']) return 'ぉ'
  if (char === 'ぃ' && progMap['い']) return 'ぃ'

  if (char === '・') return '・'
  // console.log('wanakana to romaji:', char, wanakana.toRomaji(char))
  return wanakana.toRomaji(char)
}

export const progressifyNew = () => 'DEPRECATED'

/**
 * Given a text string in Japanese that contains furigana in parentheses, this function will
 * remove the furigana and return the text string without the furigana.
 *
 * e.g., `食(た)べない` => `食べない`
 * @param text
 * @returns {string}
 */
export const removeFuri = (text) => {
  const hiraganaOrKatakanaPairRegex = /\([\u3040-\u309F\u30A0-\u30FF]+\)/g

  return text.replace(hiraganaOrKatakanaPairRegex, '')
}

export const cleanProgressiveTokens = (pts, bonusData) => {
  const newTokens = []
  pts.forEach((pt, idx) => {
    const newToken = { ...pt }

    if (idx === 0) newToken.pre = false

    // for the first token, check to see if any of the breakdowns are a period, exclam, or question mark
    // and set capitalized to true if so
    if (idx === 0 && pts.some((pt) => ['。', '！', '？'].includes(pt.bd?.basic_form))) {
      newToken.capitalized = true
    }

    // ta and masu conjugations, remove space pre of current and post of prior
    if (['特殊・タ', '特殊・マス'].includes(pt.bd?.conjugated_type)) {
      newToken.pre = false

      if (idx > 0) {
        newTokens[idx - 1].post = false
      }
    }

    // space after numbers
    if (pt.bd?.pos_detail_1 === '数' && spacedNumbers.includes(pt.bd?.basic_form)) {
      newToken.post = true
    }

    // capitalize if the prior token was a period, exclam, or question mark
    if (idx > 0 && ['。', '！', '？'].includes(newTokens[idx - 1].bd?.basic_form)) {
      newToken.capitalized = true
    }

    // put a pre space before a counter
    if (pt.bd?.pos_detail_1 === '接尾' && pt.bd?.pos_detail_2 === '助数詞') {
      newToken.pre = true
    }

    // remove any pre space for 人(じん)
    if (pt.bd?.pos_detail_1 === '接尾' && pt.bd?.basic_form === '人') {
      newToken.pre = false
    }

    // if there's a doubling of a character, remove the space pre of the current and post of the prior
    if (idx > 0 && newToken.text[0]?.[0] === newToken.text[0]?.[1] && !['a', 'i', 'u', 'e', 'o'].includes(newToken.text[0]?.[0])) {
      newToken.pre = false
      newTokens[idx - 1].post = false
    }

    // with romaji, the text will be stored as an array of all of the characters, typically, but romajified
    // if there is a single "n" character by itself in this array, followed by a character that starts with "n",
    // turn the first "n" into a "n'"
    const loneN = newToken.text.indexOf('n')
    if (loneN !== -1 && newToken.text[loneN + 1] && newToken.text[loneN + 1][0] === 'n') {
      newToken.text[loneN] = "n'"
    }

    // similarly, if any of the text tokens in the text array end with an "n", and the next token starts with "n",
    // add a "'" to the end of the current token
    newToken.text = newToken.text.map((t, idx) => {
      if (t[t.length - 1] === 'n' && newToken.text[idx + 1] && newToken.text[idx + 1][0] === 'n') {
        return t + "'"
      }

      return t
    })

    // if any of the text tokens are "wo", change it to "o"
    // only if not overridden by the particleOverride flag
    newToken.text = newToken.text.map((t) => {
      if (t === 'wo' && !bonusData?.particle_override) return 'o'
      return t
    })

    // remove the post space of the element prior if the current pos_detail_2 is 助数詞
    if (idx > 0 && pt.bd?.pos_detail_2 === '助数詞') {
      newTokens[idx - 1].post = false

      // additionally, if the current element has the basic_form of 時半, add a space to the end of the
      // first element in the text array
      if (pt.bd?.basic_form === '時半') {
        newToken.text[0] = newToken.text[0] + ' '
      }
    }

    // add a post space to an element if its pos_detail_2 is 副詞可能
    if (pt.bd?.pos_detail_2 === '副詞可能') {
      newToken.post = true
    }

    // if the current token is a punctuation, add a post space
    if (['。', '！', '？'].includes(pt.bd?.basic_form)) {
      newToken.post = true
      newToken.pre = false

      // also remove post from the token preceding
      if (idx > 0) {
        newTokens[idx - 1].post = false
      }
    }

    if (pt.bd?.pos_detail_2 === '助動詞語幹') {
      if (idx > 0 && newTokens[idx - 1].bd?.conjugated_type === '特殊・ダ') newToken.pre = true
      else if (idx > 0 && newTokens[idx - 1].bd?.pos === '名詞' && newTokens[idx - 1].bd?.pos_detail_2 !== '特殊') newToken.pre = true
      else newToken.pre = false
    }

    // if the basic form is ない, remove the post space from the prior token
    if (pt.bd?.basic_form === 'ない' && newTokens[idx - 1]?.bd.pos === '動詞') {
      newToken.pre = false
      if (idx > 0) {
        newTokens[idx - 1].post = false
      }
    }

    // remove the space in front of the で that comes after ない
    if (pt.bd?.basic_form === 'で' && newTokens[idx - 1]?.bd?.basic_form === 'ない') {
      newToken.pre = false
    }

    // remove the space in front of the いる that comes after で
    if (pt.bd?.basic_form === 'いる' && newTokens[idx - 1]?.bd?.basic_form === 'で') {
      newToken.pre = false
    }

    // remove the space before ない when it comes after a pos 形容詞
    if (pt.bd?.basic_form === 'ない' && newTokens[idx - 1]?.bd?.pos === '形容詞') {
      newToken.pre = false
    }

    if (pt.bd?.basic_form === 'られる') {
      newToken.pre = false
      if (idx > 0) {
        newTokens[idx - 1].post = false
      }
    }

    // the さ in なさそう
    if (pt.bd?.basic_form === 'さ' && pt.bd?.pos_detail_1 === '接尾') {
      newToken.pre = false
    }

    // add a post space for 連体詞 pos
    if (pt.bd?.pos === '連体詞') {
      newToken.post = true
    }

    if (pt.bd?.pos === '接続詞') {
      newToken.post = true
    }

    if (pt.bd?.basic_form === 'くださる') {
      newToken.pre = true
    }

    // もう一度 is a special case; find its basic form and add a space to the lone "u" in the text array
    if (pt.bd?.basic_form === 'もう一度') {
      newToken.text = newToken.text.map((t) => {
        if (t === 'u') return 'u '
        return t
      })
    }

    if (pt.bd?.basic_form === 'なさる') {
      newToken.pre = false
    }

    // if (pt.bd?.basic_form === 'らっしゃる' && pts[idx - 1]?.bd?.basic_form === 'て') {
    //   newToken.pre = false
    // }

    // if (pt.bd?.basic_form === 'て' && pts[idx + 1]?.bd?.basic_form === 'らっしゃる') {
    //   newToken.post = false
    // }

    // surface form of でし, base form です, needs to have the "de" in text array turned into "de "
    if (pt.bd?.basic_form === 'です' && pt.bd?.surface_form === 'でし' && pts[idx + 1]?.bd?.surface_form === 'ましょ') {
      newToken.text = newToken.text.map((t) => {
        if (t === 'de') return 'de '
        return t
      })
    }

    // if the string join on the first text array starts with two "i"'s, set capitalized to false
    if (newToken.text.join('').match(/^ii/)) {
      newToken.capitalized = false
    }

    // remove the space preceding さん
    if (pt.bd?.basic_form === 'さん') {
      newToken.pre = false
    }

    // remove the space before ん in んです
    if (pt.bd?.basic_form === 'ん' && pt.bd?.pos_detail_1 === '非自立') {
      newToken.pre = false
    }

    // if the basic form is 引っ張る and the text is ['hi', 'pa', 'ru], add an extra 'p' to 'pa'
    if (pt.bd?.basic_form === '引っ張る' && newToken.text.join('') === 'hiparu') {
      newToken.text = ['hi', 'pp', 'aru']
    }

    if (newToken.basic_form === '日' && newToken.pos_detail_2 === '助数詞' && newToken.pronunciation === 'カ') {
      newToken.pre = false
    }

    // remove post for the above as well's prior token
    if (idx < pts.length - 1 && pts[idx + 1].basic_form === '日' && pts[idx + 1].pos_detail_2 === '助数詞' && pts[idx + 1].pronunciation === 'カ') {
      newToken.post = false
    }

    newTokens.push(newToken)
  })

  return newTokens
}

export const progressivePostProcess = (tokens, bonusData) => {
  return tokens.map((token, idx) => {
    const newToken = { ...token }
    const prevToken = tokens[idx - 1]
    const nextToken = tokens[idx + 1]

    // if we encounter a period, check to see if the previous token was kana; if not,
    // convert the japanese period to a roman one
    if (token.text?.[0] === '。') {
      if (prevToken && !prevToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
        newToken.text[0] = '.'
      }
    }

    if (token.text?.[0] === '？') {
      if (prevToken && !prevToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
        newToken.text[0] = '?'
      }
    }

    if (token.text?.[0] === '！') {
      if (prevToken && !prevToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
        newToken.text[0] = '!'
      }
    }

    // only make the comma an english comma if the last character of the previous token was not kana
    if (token.text?.[0] === '、') {
      const lastTokenLastChar = prevToken?.text?.[prevToken.text.length - 1]?.slice(-1)
      if (prevToken && !wanakana.isKana(lastTokenLastChar)) {
        newToken.text[0] = ','
      }
    }

    // remove post space from a japanese comma if it's japanese and not romajified
    if (newToken.bd.surface_form === '、' && newToken.text.every((t) => t === '、' || Array.isArray(t))) {
      newToken.post = false
    }

    // do the same on pre if the prior token's was a japanese comma
    if (idx > 0 && newToken.pre && tokens[idx - 1]?.bd.surface_form === '、' && tokens[idx - 1].text.every((t) => t === '、' || Array.isArray(t))) {
      newToken.pre = false
    }

    // if the next token is punctuation, remove post space
    if (idx < tokens.length - 1 && tokens[idx + 1].bd?.pos === '記号') {
      newToken.post = false
    }

    if (token.bd?.pos === '記号' && ['！', '!'].includes(token.bd?.surface_form)) {
      newToken.pre = false
    }

    if (token.bd?.basic_form === 'する' && token.bd?.surface_form === 'し' && tokens[idx + 1]?.bd?.basic_form === 'ます') {
      newToken.post = false
    }

    // if the previous token was a period, capitalize this token
    if (idx > 0 && tokens[idx - 1].bd?.pos === '記号' && tokens[idx - 1].bd?.surface_form === '。') {
      newToken.capitalized = true
    }

    // convert vertical center dot to space
    if (token.text?.[0] === '･') {
      newToken.text[0] = ' '
    }

    // special case romaji for こんにちは and こんばんは
    if (token.bd.surface_form === 'こんにちは') {
      newToken.text = newToken.text.map((t) => t === 'ha' ? 'wa' : t)
      if (newToken.formatted) newToken.formatted = newToken.formatted.map((t) => ({ ...t, text: t.text === 'ha' ? 'wa' : t.text }))
    }

    if (token.bd.surface_form === 'こんばんは') {
      newToken.text = newToken.text.map((t) => t === 'ha' ? 'wa' : t)
      if (newToken.formatted) newToken.formatted = newToken.formatted.map((t) => ({ ...t, text: t.text === 'ha' ? 'wa' : t.text }))
    }

    if (token.bd.basic_form === 'いいえ' && token.capitalized) {
      newToken.capitalized = false
    }

    // if this is either the first token or the previous token was punctuation and this token starts
    // with "I", lowercase it
    if (newToken.text[0]?.indexOf('i') === 0 && (idx === 0 || (idx > 0 && tokens[idx - 1].bd?.pos === '記号' && tokens[idx - 1].bd?.surface_form !== '、'))) {
      newToken.capitalized = false
    }

    if (newToken.bd?.pos === '副詞' && newToken.bd?.basic_form !== 'まして') {
      newToken.post = true
    }

    // fix for 十日
    if (newToken.bd?.pos === '名詞' && newToken.bd?.basic_form === '十') {
      newToken.post = false
    }

    // fix for e.g., 初めまして
    if (nextToken?.bd?.pos === '副詞' && nextToken?.bd?.basic_form === 'まして') {
      newToken.post = false
    }

    if (newToken.bd?.pos === '副詞' && newToken.bd?.surface_form === 'まして') {
      newToken.pre = false
    }

    // remove any spaces before commas or periods
    if (newToken.bd?.pos === '記号' && ['、', '。'].includes(newToken.bd?.surface_form)) {
      newToken.pre = false
    }

    // same, but consider it from the preceding token for its post
    if (idx < tokens.length - 1 && tokens[idx + 1].bd?.pos === '記号' && ['、', '。'].includes(tokens[idx + 1].bd?.surface_form)) {
      newToken.post = false
    }

    if (newToken.bd?.basic_form === '・' && newToken.text[0] === '/') {
      newToken.text[0] = ' '
    }

    // eliminate the space between か and な or か and なぁ (they're two separate tokens)
    if (newToken.bd?.basic_form === 'か' && tokens[idx + 1]?.bd?.basic_form === 'な') {
      newToken.post = false
    }
    // account for pre space in かな
    if (newToken.bd?.basic_form === 'な' && tokens[idx - 1]?.bd?.basic_form === 'か') {
      newToken.pre = false
    }

    if (newToken.bd?.basic_form === 'か' && ['なあ', 'なぁ'].includes(tokens[idx + 1]?.bd?.basic_form)) {
      newToken.post = false
    }
    // account for pre space in かなぁ
    if (['なあ', 'なぁ'].includes(newToken.bd?.basic_form) && tokens[idx - 1]?.bd?.basic_form === 'か') {
      newToken.pre = false
    }

    // remove space after で and before 下さい if they are tokens in that order
    if (newToken.bd?.basic_form === 'で' && tokens[idx + 1]?.bd?.surface_form === '下さい') {
      newToken.post = false
    }
    if (newToken.bd?.surface_form === '下さい' && tokens[idx - 1]?.bd?.basic_form === 'で') {
      newToken.pre = false
    }

    // remove the space before 時半 if it's a number before
    if (newToken.bd?.basic_form === '時半') {
      if (tokens[idx - 1]?.bd?.pos_detail_1 === '数') {
        newToken.pre = false
      }
    }

    if (newToken.bd?.pos_detail_1 === '数' && tokens[idx + 1]?.bd?.basic_form === '時半') {
      newToken.post = false
    }

    if (newToken.bd?.basic_form === '日' && newToken.bd?.pos_detail_2 === '助数詞' && newToken.bd?.pronunciation === 'カ') {
      newToken.pre = false
    }

    // remove post for the above as well's prior token
    if (idx < tokens.length - 1 && tokens[idx + 1].bd?.basic_form === '日' && tokens[idx + 1].bd?.pos_detail_2 === '助数詞' && tokens[idx + 1].bd?.pronunciation === 'カ') {
      newToken.post = false
    }

    // remove space between そう and です
    if (newToken.bd?.basic_form === 'そう' && tokens[idx + 1]?.bd?.basic_form === 'です') {
      newToken.post = false
    }
    if (newToken.bd?.basic_form === 'です' && tokens[idx - 1]?.bd?.basic_form === 'そう') {
      newToken.pre = false
    }

    // sometimes, we can get a token like "は" written as "ha" instead of "wa", either by itself
    // or when in something like では. in this case, we need to
    // 1) confirm the surface form has no kanji
    // 2) confirm a string split on surface_form, pronunciation, and text is the same length
    // 3) if there's a "ha" anywhere in the text, and its matching index in pronunciation is "ワ", replace it
    // with "wa"
    const allKana = newToken.bd?.surface_form?.split('').every((c) => wanakana.isKana(c))
    const pronSplit = newToken.bd?.pronunciation?.split('') || []
    const textSplit = newToken.text || []

    if (allKana && pronSplit.length === textSplit.length) {
      const haIndex = textSplit.findIndex((t) => t === 'ha')
      if (haIndex !== -1 && pronSplit[haIndex] === 'ワ' && !bonusData?.kana_override) {
        newToken.text[haIndex] = 'wa'
      }
    }

    // if the last character in our text is an "n" and the first in the next is "i", we want to add
    // an apostrophe to the end
    if (newToken?.text?.length && nextToken?.text?.length) {
      const lastTextToken = newToken.text?.[newToken.text.length - 1]
      const lastTextChar = lastTextToken[lastTextToken.length - 1]
      const firstNextTextToken = nextToken?.text?.[0]

      if (lastTextChar === 'n' && (firstNextTextToken === 'i' || firstNextTextToken === 'n')) {
        newToken.text[newToken.text.length - 1] = `${lastTextToken}'`
      }

      // do the equivalent if there's a `formatted` object
      if (newToken?.formatted?.length && nextToken?.formatted?.length) {
        const lastFormattedToken = newToken.formatted[newToken.formatted.length - 1]
        const lastFormattedChar = lastFormattedToken.text[lastFormattedToken.text.length - 1]
        const firstNextFormattedToken = nextToken.formatted[0]

        if (lastFormattedChar === 'n' && (firstNextFormattedToken.text === 'i' || firstNextFormattedToken.text === 'n')) {
          newToken.formatted[newToken.formatted.length - 1].text = `${lastFormattedToken.text}'`
        }
      }
    }

    // do the same for the text tokens just within this individual token
    if (newToken?.text?.length) {
      newToken.text = newToken.text.map((t, idy) => {
        if (newToken.text[idy + 1] && t.indexOf('n') === t.length - 1 && (newToken.text[idy + 1].indexOf('i') === 0 || newToken.text[idy + 1].indexOf('n') === 0)) {
          return `${t}'`
        }
        return t
      })
    }

    // do the same for formatted within this individual token
    if (newToken?.formatted?.length) {
      newToken.formatted = newToken.formatted.map((t, idy) => {
        if (newToken.formatted[idy + 1] && t.text.indexOf('n') === t.text.length - 1 && (newToken.formatted[idy + 1].text.indexOf('i') === 0 || newToken.formatted[idy + 1].text.indexOf('n') === 0)) {
          return { ...t, text: `${t.text}'` }
        }
        return t
      })
    }

    // force the first token's text and/or formatted text to be lowercase if
    // bonus data contains force_lowercase
    if (idx === 0 && bonusData?.force_lowercase) {
      newToken.capitalized = false
      newToken.force_lower = true
      newToken.text = newToken.text.map((t) => {
        if (Array.isArray(t)) return t.map((t2) => t2.toLowerCase())
        return (t?.text || t)?.toLowerCase()
      })
      if (newToken.formatted) newToken.formatted = newToken.formatted.map((t) => ({ ...t, text: t.text?.toLowerCase() }))
    }

    return newToken
  })
}

const spacedNumbers = ['十', '百', '千', '万', '億', '兆', '京', '垓', '𥝱', '穣', '溝', '澗', '正', '載', '極', '恒河沙', '阿僧祇', '那由他', '不可思議', '無量大数']

// we don't need spaces if everything is kana, unless there are particles
export const removeSpaces = (tokens) => {
  return tokens.map((token, idx) => {
    let newToken = { ...token }

    // return token if there's a post space and the token is a number
    if (newToken.bd.pos_detail_1 === '数') {
      if (spacedNumbers.includes(newToken.bd.basic_form)) {
        newToken.pre = false
        newToken.post = true
        return newToken
      }
    }

    // check to see if this token's text and previous are all kana; remove pre if so
    if (newToken.pre && idx > 0) {
      const prevToken = tokens[idx - 1]
      const nowAllKana = newToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))
      const prevAllKana = prevToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))
      if (nowAllKana && prevAllKana) {
        newToken.pre = false
      }
    }

    // if this is a particle, remove "pre" space if it's in kana
    if (newToken.bd.pos === '助詞' && newToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
      newToken.pre = false
    }

    // remove "post" from this token if the next token is a particle in kana
    if (newToken.post && idx < tokens.length - 1) {
      const nextToken = tokens[idx + 1]
      if (nextToken.bd.pos === '助詞' && nextToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
        newToken.post = false
      }
    }

    // check to see if this token's text and next are all kana; remove post if so
    if (newToken.post && idx < tokens.length - 1 && newToken.bd.pos !== '助詞') {
      const nextToken = tokens[idx + 1]
      const nowAllKana = newToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))
      const nextAllKana = nextToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t))
      if (nowAllKana && nextAllKana) {
        newToken.post = false
      }
    }

    // if the end character of this token is kana or kanji, remove post space
    if (newToken.post && newToken.text[newToken.text.length - 1]) {
      const lastChar = newToken.text[newToken.text.length - 1]
      if (wanakana.isKana(lastChar) || wanakana.isKanji(lastChar)) {
        newToken.post = false
      }
    }

    // if the start character of this token is kana or kanji, remove pre space
    if (newToken.pre && newToken.text[0]) {
      const firstChar = newToken.text[0]
      if (wanakana.isKana(firstChar) || wanakana.isKanji(firstChar)) {
        newToken.pre = false
      }
    }

    // however, add a post space back in if the current token is a particle
    if (newToken.bd.pos === '助詞' && newToken.bd?.basic_form !== 'て') {
      newToken.post = true
    }

    // if it's the last token, remove post
    if (idx === tokens.length - 1) {
      newToken.post = false
    }

    // if a じゃ token is followed by a ない token, remove the post space from じゃ and pre from ない
    if (newToken.bd.surface_form === 'じゃ' && tokens[idx + 1]?.bd.surface_form === 'ない') {
      newToken.post = false
    }
    if (newToken.bd.surface_form === 'ない' && tokens[idx - 1]?.bd.surface_form === 'じゃ') {
      newToken.pre = false
    }

    // if there are two particles back to back and they're both kana, remove the post space from the first
    if (newToken.bd.pos === '助詞' && tokens[idx + 1]?.bd.pos === '助詞' && newToken.text.every((t) => wanakana.isKana(t) || Array.isArray(t)) && tokens[idx + 1].text.every((t) => wanakana.isKana(t) || Array.isArray(t))) {
      newToken.post = false
    }

    // if the token preceding this is empty and we have pre = true, set to false
    if (newToken.pre && idx > 0 && tokens[idx - 1].text.length === 0) {
      newToken.pre = false
    }

    return newToken
  })
}

/**
 * Given a formatted string that contains formatting tokens of the form `|:` and `:|`, where
 * the first token also carries with it a letter of the type of formatting (e.g., `|:b`), this
 * function will split the string based upon those tokens and return an array of objects
 * that contain the formatting type and the text that should be formatted. It can take an optional
 * starting format, which is useful for when you want to split a string into multiple parts and
 * have the formatting carry over from one part to the next.
 *
 * Note: It's important that each starting tag also grab its associated format type; it will always
 * be a single ASCII character (u, b, i, c, etc.), and color will have an associated angle bracket color
 * code following it.
 *
 * e.g., `食べ|:iない:|` => [{type: '', text: '食べ'}, { type: 'i', text: 'ない' }]
 * e.g., `食べ|:c<red>ない:|` => [{type: '', text: '食べ'}, { type: 'c', color: 'red', text: 'ない' }]
 * e.g., `食べ|:c<#ccceee>ない:|` => [{type: '', text: '食べ'}, { type: 'c', color: '#ccceee', text: 'ない' }]
 * e.g., `食べ|:bな|:uい:|:|` => [{type: '', text: '食べ'}, { type: 'b', text: 'な' }, { type: 'bu', text: 'い' }]
 *
 * @param {string} formatText
 * @param {string} startingFormat
 * @returns {Array}
 */
const buildFormatIndex = (formatText, startingFormat = '') => {
  const shouldPrint = formatText === '|:h<#32c8dc>決める:|'
  // if (shouldPrint) console.log(formatText)

  const formatIndex = []
  let currentFormat = startingFormat
  let currentText = ''
  let currentColor = ''
  let currentHighlightColor = ''

  for (let i = 0; i < formatText.length; i++) {
    const char = formatText[i]
    const nextChar = formatText[i + 1]

    if (char === '|' && nextChar === ':') {
      // if we have text, add it to the format index
      if (currentText) {
        formatIndex.push({ type: currentFormat, color: currentColor, highlightColor: currentHighlightColor, text: currentText })
      }

      // reset the current text
      currentText = ''

      // move the index forward
      i++

      // grab the format type
      const formatType = formatText[i + 1]

      // if (shouldPrint) console.log(formatType)

      // if we have a color, grab it
      // if (shouldPrint) console.log(formatText[i + 2])
      if (formatText[i + 2] === '<') {
        const colorEnd = formatText.indexOf('>', i + 3)
        // if (shouldPrint) console.log(colorEnd)
        if (formatType === 'c') currentColor = formatText.substring(i + 3, colorEnd)
        else if (formatType === 'h') currentHighlightColor = formatText.substring(i + 3, colorEnd)

        if (shouldPrint) {
          // console.log('current color:', currentColor)
          // console.log('current highlight color:', currentHighlightColor)
        }
        i = colorEnd - 1
      }

      // if we have a format type, add it to the current format
      if (formatType) {
        currentFormat += formatType
        i++
      }
    } else if (char === ':' && nextChar === '|') {
      // if we have text, add it to the format index
      if (currentText) {
        formatIndex.push({
          type: currentFormat,
          color: currentColor,
          highlightColor: currentHighlightColor,
          text: currentText
        })
      }

      // reset the current text
      currentText = ''

      // move the index forward
      i++

      // remove the last character from the current format
      if (currentFormat.length > 0) currentFormat = currentFormat.slice(0, -1)
    } else {
      currentText += char
    }
  }

  // if we have text, add it to the format index
  if (currentText) {
    formatIndex.push({
      type: currentFormat,
      color: currentColor,
      highlightColor: currentHighlightColor,
      text: currentText
    })
  }

  // if (shouldPrint) console.log(formatIndex)
  return formatIndex
}

export const generateTokens = (breakdown, newFuriData, progMap, bonusData, force = null, record = null, formatText = null) => {
  // compile a format index if we have a format string
  const formatIndex = formatText ? buildFormatIndex(removeFuri(formatText)) : null

  // if (formatIndex) console.log('format index', formatIndex)

  // used to keep track of where we are in the non-furi version of the text as we're traversing
  // the breakdown; we use this in tandem with the indexed formatting data to decide whether we
  // want to format the current bit of text
  let charCounter = -1

  // used to keep track of where we are in the format index array
  let currentFormatIndex = 0

  // which character within the current format index we're in
  let currentFormatSubIndex = 0

  // 1) iterate over the newFuriData and breakdown together, producing an array
  //    of objects that contain the pre and post spaces, an array of string/furi
  //    pairs, and whether it should be capitalized
  const progressiveTokens = []

  // which newFuri chunk we're in
  let newFuriIndex = 0

  // which string index within the chunk we're in
  let newFuriSubIndex = 0

  // console.log(formatText, breakdown)
  const shouldPrint = force === 'romaji' && ['一(いっ)か月(げつ)のうち、半(はん)月(つき)ぐらい出(しゅっ)張(ちょう)をしています。'].includes(record?.base_text)
  // const shouldPrint = record?.base_text === '天(てん)気(き)の悪(わる)い日(ひ)は、寝(ね)る時(とき)に悪(お)寒(かん)がして、悪(あく)夢(む)を見(み)ます。'
  // const shouldPrint = false
  // if (shouldPrint) console.log(record.base_text)
  // const shouldPrint = ['美味(おい)しい', '素(す)晴(ば)らしい'].includes(record?.base_text)
  // if (shouldPrint) console.log(record.base_text)
  if (shouldPrint) console.log('new furi data', newFuriData)

  // this happens when e.g., a double kanji is split into two breakdown chunks
  let bdKanjiMismatch = false

  // for cases like 一緒に, where the kana has a small tsu and small ょ separated
  let romajiSmallTsuCombo = false

  // special case for kanji memory, where #s are removed on the backend for the record but still
  // within the format text
  let preNum = ''
  const prenumRegex = /[❶-❾]/g

  // console.log(breakdown, newFuriData)

  // a situation in case we do have a number starting in the format text
  if (formatIndex && /[❶-❾]/g.test(formatIndex[0]?.text?.[0])) {
    preNum = formatIndex[0].text[0]
    currentFormatSubIndex++
  }

  breakdown.forEach((bd, bdIdx) => {
    // init with the pre and post spaces and capitalized
    const token = {
      ...getSpaces(breakdown, bdIdx),
      text: preNum && bdIdx === 0 ? [preNum] : [],
      bd,
      formatted: formatIndex ? [{ text: bdIdx === 0 ? preNum : '', format: '', color: '', highlightColor: '' }] : null
    }
    if (bdIdx === 0) preNum = ''

    // whether this token is a particle (used to take reading for romaji)
    const isParticle = !bonusData?.particle_override && bd.pos === PARTICLE

    // whether we should force the kana reading
    const forceKana = bonusData?.kana_override

    // this is used to skip a double kanji in the furi data
    let skipNext = false, skipCounter = -1
    let numberSkipped = 0

    // if (shouldPrint) console.log(bd)

    // one by one, iterate over the bd's chars and add the corresponding from
    // the newFuriData, converting to kata or romaji as necessary per prog
    // settings. if we have a double pair in the current furi chunk, it should
    // be added as a pair to the text
    bd.surface_form?.trim().split('').forEach((char, charIdx) => {
      if (shouldPrint) {
        console.log('current char:', char)
        if (formatIndex) console.log('format index in loop', formatIndex)
        // console.log('token', token)
      }
      charCounter++
      // do format text processing
      if (formatIndex && currentFormatIndex < formatIndex.length) {
        // if we're at the end of the current format index, move to the next one
        if (currentFormatSubIndex >= formatIndex[currentFormatIndex].text.length) {
          currentFormatIndex++
          currentFormatSubIndex = 0
          if (shouldPrint) console.log('moving to next format index', currentFormatIndex)

          if (/[❶-❾]/g.test(formatIndex[currentFormatIndex].text[0])) {
            preNum = formatIndex[currentFormatIndex].text[0]
            if (shouldPrint) console.log('pre num', preNum)
            currentFormatSubIndex++

            token.formatted.push({
              text: preNum,
              format: formatIndex[currentFormatIndex].type,
              color: formatIndex[currentFormatIndex].color,
              highlightColor: formatIndex[currentFormatIndex].highlightColor
            })
            token.text.push(preNum)
          }
        }

        // if we're not at the end of the format index, check to see if we're at
        // the same character in the format index as we are in the breakdown
        if (currentFormatIndex < formatIndex.length) {
          // if we're not at the same character, skip this one
          if (char === formatIndex[currentFormatIndex].text[currentFormatSubIndex]) {
            if (shouldPrint) console.log('same char', char, formatIndex[currentFormatIndex].text[currentFormatSubIndex])
            // if we are at the same character, add the format data to the token
            // and move the format index forward
            token.formatted.push({
              text: preNum + '',
              format: formatIndex[currentFormatIndex].type,
              color: formatIndex[currentFormatIndex].color,
              highlightColor: formatIndex[currentFormatIndex].highlightColor
            })
            preNum = ''
            currentFormatSubIndex++
            if (shouldPrint) console.log('token formatted', token.formatted)
          }
        }
      }

      // if (shouldPrint) console.log('char', char)
      if (skipCounter > -1) skipCounter--
      // if (shouldPrint && skipCounter !== -1) console.log('skip counter', skipCounter)
      if (skipCounter >= 0) {
        // if (shouldPrint) console.log('skip next')
        // skipNext = false
        return
      }
      // console.log('char and idx', char, charIdx)
      // console.log('new furi index', newFuriIndex)
      // console.log('new furi sub index', newFuriSubIndex)

      // if the furi chunk is size 2, it's a kanji with furi and should be added
      // as a pair, but only if that kanji in index 0 exists within the user's
      // prog map. if it doesn't, see if the individual kana within it do and add
      // them instead; if they don't, add it as romaji
      if (newFuriData[newFuriIndex]?.length === 2) {
        // console.log('double pair')
        const [kanji, kana] = newFuriData[newFuriIndex]
        if (shouldPrint) console.log('double pair', kanji, kana)

        // confirm the kanji is the same as where we are in the surface form
        // entire compound will be added this iteration, so we can skip next char
        if (kanji.length > 1 && !skipNext) {
          // skipNext = true
          skipCounter = kanji.length - 1
          numberSkipped = kanji.length
          // console.log('skip counter', skipCounter)
          skipNext = true
        }

        // if there are multiple kanji in the newFuriData but our breakdown chunk only
        // has one kanji, then it's been split in the lexical phase despite being a compound
        // and thus needs to be added/spaced on the next run; by returning here, we just
        // let the normal logic for adding to the progressive tokens take place on the next
        // cycle, where it will just encounter the second part of the kanji compound
        if (!bdKanjiMismatch && kanji.length > 1 && bd.surface_form?.length === 1) {
          if (shouldPrint) console.log('kanji mismatch, skip', kanji, kana, bd.surface_form)
          bdKanjiMismatch = true
          return
        }

        if (bdKanjiMismatch) {
          if (shouldPrint) console.log('resolving prior kanji mismatch')
          bdKanjiMismatch = false
        }

        if (shouldPrint) console.log('prog map check', kanji)
        // add the pair as-is to the token text, to be later rendered as a kanji/furi pair
        if (force === 'romaji' || force === 'kanji' || progMap[kanji] || kanji?.split('')?.every((t) => progMap[t])) {
          if (shouldPrint && force === 'romaji') console.log('force romaji', kanji, newFuriData[newFuriIndex][1])
          if (shouldPrint && progMap[kanji]) console.log('kanji in prog map', kanji, newFuriData[newFuriIndex][1])
          if (shouldPrint && force === 'kanji') console.log('force kanji', kanji, newFuriData[newFuriIndex][1])
          if (shouldPrint && kanji?.split('')?.every((t) => progMap[t])) console.log('every char in map', kanji, newFuriData[newFuriIndex][1])
          if (force === 'romaji') {
            if (shouldPrint) console.log('force romaji', kanji, newFuriData[newFuriIndex][1])
            const kana = newFuriData[newFuriIndex][1]
            if (kana[kana.length - 1] === 'っ') {
              if (shouldPrint) console.log('small tsu combo', kana)
              romajiSmallTsuCombo = true
              token.text.push(wanakana.toRomaji(kana.slice(0, kana.length - 1)))
              if (formatIndex) token.formatted[token.formatted.length - 1].text += wanakana.toRomaji(kana.slice(0, kana.length - 1))
            } else if (romajiSmallTsuCombo) {
              if (shouldPrint) console.log('romaji small tsu combo', kanji, kana)
              romajiSmallTsuCombo = false
              token.text.push(wanakana.toRomaji('っ' + kana))
              if (shouldPrint) console.log('pushed', wanakana.toRomaji('っ' + kana))
              if (formatIndex) token.formatted[token.formatted.length - 1].text += wanakana.toRomaji('っ' + kana)
            } else {
              token.text.push(wanakana.toRomaji(kana))
              if (formatIndex) token.formatted[token.formatted.length - 1].text += wanakana.toRomaji(kana)
            }
            if (shouldPrint) console.log(token.formatted?.[token.formatted?.length - 1]?.text)
            newFuriIndex++
            newFuriSubIndex = 0
          } else {
            // console.log('pushing', newFuriData[newFuriIndex])
            if (shouldPrint) console.log('pushing', newFuriData[newFuriIndex])
            token.text.push(newFuriData[newFuriIndex])
            if (formatIndex) {
              if (shouldPrint) console.log('format index adding raw', newFuriData[newFuriIndex])
              token.formatted[token.formatted.length - 1].text = newFuriData[newFuriIndex]
            }
            newFuriIndex++
            newFuriSubIndex = 0
          }
        } else {
          if (shouldPrint) console.log('kanji not in prog map')
          if (shouldPrint) console.log(kanji, kana)
          // if (shouldPrint) {
          //   console.log('kanji not in prog map', kanji, kana, bd.surface_form)
          //   console.log(kana + bd.surface_form?.slice(kanji.length))
          // }
          kana?.split('')?.forEach((t, kIdx) => {
            // check each individual kana to see if they're in the prog map
            let prev, next

            // previous kana can either be within this same chunk or at the end of the last
            if (kIdx === 0) {
              const prevLength = newFuriData[newFuriIndex - 1]?.length
              const prevLengthLength = newFuriData[newFuriIndex - 1]?.[prevLength - 1]?.length
              prev = newFuriData[newFuriIndex - 1]?.[prevLength - 1]?.[prevLengthLength - 1]
            } else {
              prev = kana[kIdx - 1]
            }

            // next kana can either be within this same chunk or at the beginning of the next
            if (kIdx === kana.length - 1) {
              const nextLength = newFuriData[newFuriIndex + 1]?.length
              const nextLengthLength = newFuriData[newFuriIndex + 1]?.[nextLength - 1]?.length
              next = newFuriData[newFuriIndex + 1]?.[nextLengthLength - 1]?.[0]
            } else {
              next = kana[kIdx + 1]
            }

            let progText = convertToProg(t, prev, next, progMap, bonusData, force)
            const nfdm1 = newFuriData[newFuriIndex - 1]
            if (shouldPrint) console.log('nfdm1', nfdm1)
            if (shouldPrint) console.log('prog text', progText)
            if (shouldPrint) console.log(t, prev, next)
            if (progText?.length > 1 && ['っ', 'ッ'].includes(progText[0])
              && newFuriIndex > 0
              && nfdm1?.length === 2 && ['っ', 'ッ'].includes(nfdm1[1][nfdm1[1].length - 1])
              && progMap[nfdm1?.[0]]) {
              progText = progText.slice(1)
            }
            if (shouldPrint) console.log('prog text', progText)
            if (progText) {
              token.text.push(progText)
              if (formatIndex) token.formatted[token.formatted.length - 1].text += progText
            }
            // if the current char is small tsu and the next token has a kanji that's in the prog map,
            // add the small tsu to token.text
            if (t === 'っ' && newFuriData[newFuriIndex + 1]?.[0]?.[0] && progMap[newFuriData[newFuriIndex + 1][0][0]]) {
              token.text.push(t)
              if (formatIndex) token.formatted[token.formatted.length - 1].text += t
            }
          })

          newFuriIndex++
          newFuriSubIndex = 0
        }
      } else {
        // if (shouldPrint) console.log('just kana, single pair')
        // console.log(record.base_text)
        // if (shouldPrint) console.log(progressiveTokens)
        // console.log(newFuriData, newFuriIndex, newFuriSubIndex)
        const currentChar = newFuriData[newFuriIndex]?.[0]?.[newFuriSubIndex]

        if (shouldPrint) console.log(currentChar, newFuriData[newFuriIndex][0], newFuriSubIndex)

        // check each individual kana to see if they're in the prog map
        let prev, next

        // previous kana can either be within this same chunk or at the end of the last
        if (newFuriSubIndex === 0) {
          const prevLength = newFuriData[newFuriIndex - 1]?.length
          const prevLengthLength = newFuriData[newFuriIndex - 1]?.[prevLength - 1]?.length
          prev = newFuriData[newFuriIndex - 1]?.[prevLength - 1]?.[prevLengthLength - 1]
        } else {
          prev = newFuriData[newFuriIndex]?.[0]?.[newFuriSubIndex - 1]
        }

        // next kana can either be within this same chunk or at the beginning of the next
        if (newFuriSubIndex === newFuriData[newFuriIndex]?.[0]?.length - 1) {
          const nextLength = newFuriData[newFuriIndex + 1]?.length
          const nextLengthLength = newFuriData[newFuriIndex + 1]?.[nextLength - 1]?.length
          next = newFuriData[newFuriIndex + 1]?.[nextLengthLength - 1]?.[0]
        } else {
          next = newFuriData[newFuriIndex]?.[0]?.[newFuriSubIndex + 1]
        }

        if (shouldPrint) console.log({ current: currentChar, prev, next, newFuriData, newFuriIndex, newFuriSubIndex })
        let progText = convertToProg(currentChar, prev, next, progMap, bonusData, force, isParticle, bd.pronunciation)
        if (shouldPrint) console.log('prog text:', progText)

        // in rare circumstances where the previous char was small tsu, romajiSmallTsu doesn't get reset
        if (force === 'romaji' && progText && romajiSmallTsuCombo) {
          if (shouldPrint) console.log('romaji small tsu combo', currentChar)
          romajiSmallTsuCombo = false
        }

        // a special case needs to exist for something like 引っ張る, where the small tsu is actually
        // not a part of any furi and should therefore be treated like normal kana and added
        // to the token text

        if (progText === 'っ' && force === 'kanji' && newFuriData[newFuriIndex - 1]?.length === 2) {
          if (shouldPrint) console.log('special case for 引っ張る')
          token.text.push(progText)
          if (formatIndex) token.formatted[token.formatted.length - 1].text += progText
        }

        // kind of the same as the above, but for cases like 真っ白 where it's baked into the grammar data
        if (!progText && currentChar === 'っ' && newFuriData[newFuriIndex + 1]?.length === 2) {
          romajiSmallTsuCombo = true
          if (force !== 'kanji' && force !== 'romaji') {
            token.text.push(currentChar)
            if (formatIndex) token.formatted[token.formatted.length - 1].text += currentChar
          }
        }

        if (progText && !['っ', 'ッ'].includes(currentChar)) {
          if (shouldPrint) console.log('prog text:', progText)

          // patch for kanji and kana duplicating small tsu when the small tsu is in a furi pair
          if (progText?.length > 1 && force === 'kanji' && ['っ', 'ッ'].includes(progText[0]) && newFuriIndex > 0 && newFuriData[newFuriIndex - 1]?.length === 2 && ['っ', 'ッ'].includes(newFuriData[newFuriIndex - 1][1][newFuriData[newFuriIndex - 1][1].length - 1])) {
            progText = progText.slice(1)
          }
          token.text.push(progText)
          if (formatIndex) token.formatted[token.formatted.length - 1].text += progText
        }

        // console.log('incrementing to next char in current furi chunk')
        newFuriSubIndex++
        if (newFuriSubIndex === newFuriData[newFuriIndex]?.[0]?.length) {
          // if (shouldPrint) console.log('reached end of furi chunk, going to next new furi chunk')
          // if (shouldPrint) console.log(newFuriData[newFuriIndex][0], newFuriIndex, newFuriSubIndex)
          newFuriIndex++
          newFuriSubIndex = 0
        }
      }
    })

    progressiveTokens.push(token)
  })

  // if (shouldPrint) console.log(progressiveTokens)
  // if (formatIndex) console.log(progressiveTokens)

  // if we have a token index, clean out every token's `formatted` key such that it filters out all
  // which have the empty string '' as the `text` field's value
  if (formatIndex) {
    progressiveTokens.forEach((t) => {
      t.formatted = t.formatted.filter((f) => f.text !== '')
    })
  }

  // if (shouldPrint) console.log('progressive tokens', progressiveTokens)

  // perform a cleanup of the progressive tokens such that if there is a token whose text field is of size 2,
  // and the second element is a string ending with っ, and the following token's text field is of size 1 and
  // contains a string starting with っ, then we remove the っ from the second token's text string
  // this is to prevent the duplication of small tsu when the small tsu is in a furi pair
  for (let i = 0; i < progressiveTokens.length - 1; i++) {
    const currentToken = progressiveTokens[i]
    const nextToken = progressiveTokens[i + 1]
    let prevToken
    if (i > 0) prevToken = progressiveTokens[i - 1]

    if (currentToken.text?.length >= 2
      && typeof currentToken.text[currentToken.text.length - 1] === 'string'
      && nextToken.text?.length === 1
      && ['っ', 'ッ'].includes(currentToken.text[currentToken.text.length - 1][currentToken.text[currentToken.text.length - 1].length - 1])
      && ['っ', 'ッ'].includes(nextToken.text[0][0])) {
      nextToken.text[0] = nextToken.text[0].slice(1)
    }

    // if the current token's last text element is a string ending with っ and the next token's text field's
    // first element is a string starting with a double consonant, then we remove the っ from the first token
    if (currentToken.text?.length >= 2
      && typeof currentToken.text[currentToken.text.length - 1] === 'string'
      && nextToken.text?.length === 1
      && nextToken.text[0]?.length > 1
      && nextToken.text[0][0] === nextToken.text[0][1] && !wanakana.isKana(nextToken.text[0][0])
      && ['っ', 'ッ'].includes(currentToken.text[currentToken.text.length - 1][currentToken.text[currentToken.text.length - 1].length - 1])) {
      currentToken.text[currentToken.text.length - 1] = currentToken.text[currentToken.text.length - 1].slice(0, -1)
    }

    // in the event we have something like this:
    // [ [ '一', 'いっ' ] ],
    // [ 'っか', [ '月', 'げつ' ] ],
    // we want to remove the っ from the second token
    if (currentToken.text?.length === 1
      && nextToken.text?.length === 2
      && Array.isArray(currentToken.text[0])
      && currentToken.text[0].length === 2
      && currentToken.text[0][1][currentToken.text[0][1].length - 1] === 'っ'
      && typeof nextToken.text[0] === 'string'
      && ['っ', 'ッ'].includes(nextToken.text[0][0])
      && nextToken.text[1]?.length === 2) {
      nextToken.text[0] = nextToken.text[0].slice(1)
    }
  }

  for (let i = 0; i < progressiveTokens.length; i++) {
    const currentToken = progressiveTokens[i]

    // also, if text in the current token is an array of strings, and there are two strings in the array
    // such that the first ends with っ and the second starts with っ, then we remove the っ from the second
    // string
    currentToken.text = currentToken.text.map((t, idx) => {
      if (typeof t === 'string' && idx > 0 && ['っ', 'ッ'].includes(t[0]) && ['っ', 'ッ'].includes(currentToken.text[idx - 1][currentToken.text[idx - 1].length - 1])) {
        return t.slice(1)
      }

      // slice the first character of the first string if it's a small tsu and then the last character in
      // the text array of the previous token is also a small tsu, or ends with a small tsu
      if (typeof t === 'string' && idx === 0 && ['っ', 'ッ'].includes(t[0]) && ['っ', 'ッ'].includes(progressiveTokens[i - 1]?.text[progressiveTokens[i - 1]?.text.length - 1]?.[progressiveTokens[i - 1]?.text[progressiveTokens[i - 1]?.text.length - 1]?.length - 1])) {
        return t.slice(1)
      }

      // if we encounter a long dash (ー) and the text token to its left is all romaji, then we turn it into a small dash
      if (t === 'ー' && wanakana.isRomaji(currentToken.text[idx - 1])) {
        return '–'
      }

      return t
    })

    // it's possible that the current token has a kanji we know with a small っ as part of its reading,
    // therefore making its text both contain the kanji as an array with a small つ in the reading,
    // as well as including the small っ in the second token of the text, which will be of type string
    // so we want to remove the small っ from the second token
    // if (currentToken.text?.length === 2) {
    //   console.log('currentToken', currentToken)
    //   console.log('currentToken.text', currentToken.text)
    //   console.log('currentToken.text?.[0]', currentToken.text?.[0])
    //   console.log('currentToken.text?.[1]', currentToken.text?.[1])
    //   console.log('currentToken.text?.length === 2', currentToken.text?.length === 2)
    //   console.log('Array.isArray(currentToken.text[0])', Array.isArray(currentToken.text[0]))
    // }
    if (currentToken.text?.length === 2
      && Array.isArray(currentToken.text[0])
      && typeof currentToken.text[1] === 'string'
      && currentToken.text[0].length === 2
      && currentToken.text[0][1][currentToken.text[0][1].length - 1] === 'っ'
      && ['っ', 'ッ'].includes(currentToken.text[1][0])) {
      currentToken.text[1] = currentToken.text[1].slice(1)
    }

    // it's possible that a token has a small っ string token followed by a double consonant string token
    // in this case, we need to remove the small っ string
    if (currentToken.text?.length > 1) {
      const smallTsu = currentToken.text.findIndex((t) => typeof t === 'string' && ['っ', 'ッ'].includes(t))
      if (smallTsu > -1 && smallTsu < currentToken.text.length - 1
        && currentToken.text[smallTsu + 1][0] === currentToken.text[smallTsu + 1][1]
        && !wanakana.isKana(currentToken.text[smallTsu + 1][0])) {
        // splice out smallTsu
        currentToken.text.splice(smallTsu, 1)
      }
    }
  }

  // if bonusData.force_capitalize, then we capitalize the first character of the first token
  // but only if it is not kana or kanji
  if (bonusData?.force_capitalize) {
    if (progressiveTokens[0].text[0]?.[0] && !wanakana.isKana(progressiveTokens[0].text[0][0]) && !wanakana.isKanji(progressiveTokens[0].text[0][0])) {
      progressiveTokens[0].text[0] = progressiveTokens[0].text[0][0].toUpperCase() + progressiveTokens[0].text[0].slice(1)
    }
  }

  // do the converse if bonusData.force_lowercase
  if (bonusData?.force_lowercase) {
    if (progressiveTokens[0].text[0]?.[0] && !wanakana.isKana(progressiveTokens[0].text[0][0]) && !wanakana.isKanji(progressiveTokens[0].text[0][0])) {
      progressiveTokens[0].text[0] = progressiveTokens[0].text[0][0].toLowerCase() + progressiveTokens[0].text[0].slice(1)
    }
  }

  // for every progressive token, within `formatted`, if there is a circle number by itself and the next
  // entry's text also starts with the same circle number, remove the lone circle number
  const circleNumRegex = /[❶-❾]/g
  progressiveTokens.forEach((t, idx) => {
    if (t.formatted?.length > 1) {
      for (let i = 0; i < t.formatted.length - 1; i++) {
        const current = t.formatted[i]
        const next = t.formatted[i + 1]

        if (current.text?.length === 1 && next.text?.length > 1 && current.text[0] === next.text[0] && /[❶-❾]/g.test(current.text[0])) {
          current.text = ''
        }
      }
    }
  })

  // fix 、 comma when it's the first text token in a progressive token and the last token of the previous
  // token was not japanese text
  progressiveTokens.forEach((t, idx) => {
    if (t.text?.length === 1 && t.text[0] === '、' && idx > 0 && !wanakana.isJapanese(progressiveTokens[idx - 1].text[progressiveTokens[idx - 1].text.length - 1])) {
      t.text[0] = ','
      if (t.formatted?.[0]?.text === '、') t.formatted[0].text = ','
    }

    // do similarly for japanese periods
    if (t.text?.length === 1 && t.text[0] === '。' && idx > 0 && !wanakana.isJapanese(progressiveTokens[idx - 1].text[progressiveTokens[idx - 1].text.length - 1])) {
      t.text[0] = '.'
      if (t.formatted?.[0]?.text === '。') t.formatted[0].text = '.'
    }
  })

  // make sure the first token never has pre: true
  if (progressiveTokens[0].pre) progressiveTokens[0].pre = false
  // console.log(progressiveTokens)
  // console.log(progressiveTokens[0]?.text?.[0])

  return progressiveTokens
}

export const createFuriData = (splitRecord, furis) => {
  const newFuriData = []

  splitRecord && splitRecord.forEach((sr) => {
    // check each character of the non-furi for kanji
    for (let i = 0; i < sr.length; i++) {
      if (wanakana.isKanji(sr[i])) {
        // if we're > 0, we have preceding kana; split
        if (i > 0) {
          const preKana = sr.slice(0, i)
          const postKanji = sr.slice(i)
          newFuriData.push([preKana])
          newFuriData.push([postKanji])
          return
        } else {
          newFuriData.push([sr])
          return
        }
      }

      if (i === sr.length - 1) {
        newFuriData.push([sr])
      }
    }
  })

  // match furi with new non-furi
  if (furis) {
    let currentKanji = 0
    for (let i = 0; i < furis.length; i++) {
      while (currentKanji < newFuriData.length) {
        if (wanakana.isKanji(newFuriData[currentKanji][0][0]) || newFuriData[currentKanji][0][0] === '々' || newFuriData[currentKanji][0][0] === 'ヵ' || newFuriData[currentKanji][0][0] === 'ヶ') {
          newFuriData[currentKanji].push(furis[i])
          currentKanji++
          break
        } else {
          currentKanji++
        }
      }
    }
  }

  return newFuriData
}

export const recordSplit = (record) => {
  const furiRegex = /\([０-９？　-龯_]+\)/g

  const furis = record?.base_text?.match(furiRegex)?.map((f) => f.replace(/[()]/g, ''))
  const splitRecord = record?.base_text?.split(furiRegex).filter((e) => e !== '')

  return { furis, splitRecord }
}

export const pairFuriToKanjiProg = () => 'DEPRECATED'
export const pairFuriToKanjiProgNew = () => 'DEPRECATED'
export const patchReading = () => 'DEPRECATED'

export const isOnYomi = (kanji, furi, m) => {
  const map = {}
  m?.forEach((entry) => {
    // console.log(entry)
    const nested = entry?.[0]
    // console.log(nested)
    map[nested?.literal] = nested
  })
  // console.log(map)
  const on_yomi = map?.[kanji]?.on_yomi
  const flat_on = on_yomi?.map(wanakana.toHiragana)
  if (flat_on?.includes(furi)) return true

  // check if prefix furi
  if (furi[furi.length - 1] === '-') {
    const pre = furi.slice(0, -1)
    if (flat_on?.includes(pre)) return true
  }

  if (!flat_on) return false

  // check if this is suffix furi
  for (let i = 0; i < flat_on.length; i++) {
    const on = flat_on[i]

    // check if creating a がっこう type deal with small tsu
    if (furi.indexOf('っ') !== -1) {
      if (furi[0] === on[0]) return true
    }

    if (on[0] === '-') {
      const post = on.substring(1)
      if (furi === post) return true

      // check dakuten/handakuten
      const dak = String.fromCharCode(post.charCodeAt(0) + 1) + post.substring(1)
      if (furi === dak) return true

      const han = String.fromCharCode(dak.charCodeAt(0) + 1) + post.substring(1)
      if (furi === han) return true
    }
  }

  return false
}

export const isKunYomi = (kanji, furi, m) => {
  const map = {}
  m?.forEach((entry) => {
    // console.log(entry)
    const nested = entry?.[0]
    // console.log(nested)
    map[nested?.literal] = nested
  })
  // console.log(map)
  const flat_kun = map?.[kanji]?.kun_yomi
  if (flat_kun?.includes(furi)) return true

  if (!flat_kun) return false

  // check if this is suffix furi
  for (let i = 0; i < flat_kun.length; i++) {
    const kun = flat_kun[i]

    // check if prefix furi
    if (kun[kun.length - 1] === '-') {
      const pre = kun.slice(0, -1)
      if (pre === furi) return true
    }

    // check if pre-dot
    if (kun.indexOf('.') !== -1) {
      if (furi === kun.split('.')[0]) {
        return true
      }
    }

    let post = kun
    if (kun[0] === '-') {
      post = kun.substring(1)
    }

    if (furi === post) return true

    // check dakuten/handakuten
    const dak = String.fromCharCode(post.charCodeAt(0) + 1) + post.substring(1)
    if (furi === dak) return true

    const han = String.fromCharCode(dak.charCodeAt(0) + 1) + post.substring(1)
    if (furi === han) return true
  }

  return false
}

export default false
