import { getDocument, TextContentItem } from 'pdfjs-dist/webpack'
import { uniq } from 'lodash'
import type { ModelValue as Area } from '~/components/inputs/InputPdfArea.vue'
import { PDFDocumentProxy } from 'pdfjs-dist/types/display/api'

/**
 * Checks whether or not the runtime supports HTML5 FileReader
 */
const canReadFileContents = (): boolean => {
	return !!('ArrayBuffer' in window && 'File' in window && 'FileReader' in window)
}

/**
 * Offers an async api to read a file buffer
 */
function readFileAsync(file: Blob) {
	return new Promise((resolve, reject) => {
		const reader = new FileReader()
		reader.addEventListener('load', () => {
			resolve(reader.result)
		})
		reader.addEventListener('error', reject)
		reader.readAsArrayBuffer(file)
	})
}

export const readPdf = async (file: Blob): Promise<PDFDocumentProxy | null> => {
	const result = await readFileAsync(file)
	if (!result) {
		return null
	}
	return await getDocument(result).promise
}

// Internal: Checks whether the provided string is only made up of white spaces
function isNotWhiteSpace(text) {
	return !/^\s+$/.test(text)
}

// Internal: Collapses multiple successive whitespaces
function collapseWhiteSpaces(text) {
	return text.replace(/\s+/, ' ').trim()
}

function normalizeString(string) {
	return string.toUpperCase().trim()
}

function isInArea(transform: number[], area: Area): boolean {
	const [x, y] = [transform[4], transform[5]]

	return x >= area.x && x <= area.x + area.width && y <= area.y && y >= area.y - area.height
}
// Internal: Finds cohesive strings within the provided list of glyph items
export function getMergedTextsFromItems(items: TextContentItem[]): string[] {
	// Constants that should have been provided by PDF.js, but aren't.
	// TRM = TransformMatrix
	const TRM_SX = 0,
		TRM_X = 4,
		TRM_Y = 5

	// Internal: Returns a boolean indicating whether or not value is within the desired deviation around compare
	function near(value, compare, deviation) {
		return value > compare - deviation && value < compare + deviation
	}

	if (!items || !items.length) return []

	const strings: string[] = []
	let string = items[0].str || ''
	let last = items[0],
		item

	// Iterate over each item (= glyph)
	for (let i = 1; i < items.length; i++) {
		item = items[i]

		// Skip items that don't have a string representation
		if (!item.str) continue

		// Compare the visual coordinates of the last and current item:
		// If they are near each other, but not directly touching, we need to insert a whitespace character in between.
		// If they're far away, assume that the two glyphs are unrelated and the previous string is complete and can be extracted.
		if (
			near(
				// they are horizontally not super far away
				item.transform[TRM_X],
				last.transform[TRM_X],
				(item.width + last.width) * 10,
			) &&
			near(
				// they are in the same line
				item.transform[TRM_Y],
				last.transform[TRM_Y],
				item.transform[TRM_SX],
			)
		) {
			if (
				isNotWhiteSpace(item.str) &&
				isNotWhiteSpace(last.str) &&
				!near(
					// they are horizontally right next to each other
					item.transform[TRM_X],
					last.transform[TRM_X],
					item.width + last.width,
				)
			) {
				string += ' '
			}
		} else {
			strings.push(string)
			string = ''
		}

		// Append glyph string and remember the current item for next iteration
		string += item.str
		last = item
	}

	// Push whatever we have left
	if (string) strings.push(string)

	// Remove empty strings and collapse multi-whitespace sequences
	return strings.filter(isNotWhiteSpace).map(collapseWhiteSpaces)
}

function areItemsInSameLine(item1: TextContentItem, item2: TextContentItem): boolean {
	function near(value, compare, deviation) {
		return value > compare - deviation && value < compare + deviation
	}
	const TRM_SX = 0,
		TRM_SY = 3,
		TRM_X = 4,
		TRM_Y = 5
	return (
		near(
			// they are horizontally not super far away
			item1.transform[TRM_X],
			item2.transform[TRM_X],
			(item1.width + item2.width) * 10,
		) &&
		near(
			// they are in the same line
			item1.transform[TRM_Y],
			item2.transform[TRM_Y],
			item1.transform[TRM_SX],
		)
	)
}

// Internal: Extract the strings found on a specific page of the provided PDF document
export const getPageItems = async (
	pdf: PDFDocumentProxy,
	pageNumber: number,
	area?: {
		x: number
		y: number
		width: number
		height: number
	},
): Promise<TextContentItem[]> => {
	const page = await pdf.getPage(pageNumber)
	const content = await page.getTextContent()
	if (area) {
		return content.items.filter((item) => isInArea(item.transform, area))
	}
	return content.items
}

/**
 * Parses the provided PDF File using PDF.js and invokes the callback with an array of strings, representing each page's text content.
 */
export const getStrings = async (file: PDFDocumentProxy, area: Area | undefined): Promise<string[]> => {
	if (!canReadFileContents()) throw new Error('Browser does not support reading file contents!')

	// Internal: Extract the strings found on a specific page of the provided PDF document
	const getPageStrings = async (pdf, i) => {
		const items = await getPageItems(pdf, i + 1, area)
		const strings: string[] = []
		items.forEach((item) => {
			!!item.str && isNotWhiteSpace(item.str) && strings.push(item.str)
		})
		return strings
	}

	if (!file || !file.numPages) {
		// abort if we couldn't read a reasonable pdf
		return []
	}
	const num = file.numPages

	let strings: string[] = []
	for (let i = 0; i < num; i++) {
		const newStrings = (await getPageStrings(file, i)) || []
		strings = [...strings, ...newStrings]
	}

	// cleanup pdf worker to free allocated memory
	// pdf.cleanup()

	return strings
}

/**
 * Returns all pdf nodes' text values that contain the given marker
 */
export const getStringsWithMarker = async (
	file: PDFDocumentProxy,
	marker: string,
	area: Area | undefined,
): Promise<string[]> => {
	const strings = await getStrings(file, area)
	const normalizedMarker = normalizeString(marker)

	return strings.filter((s) => {
		const normalizedString = normalizeString(s)
		return normalizedString.includes(normalizedMarker)
	})
}

export const getFirstStringAfterFirstMarker = async (
	file: PDFDocumentProxy,
	marker: string,
	area: Area | undefined,
) => {
	const strings = await getStrings(file, area)
	const normalizedMarker = normalizeString(marker)

	for (let i = 0; i < strings.length; i++) {
		const normalizedString = normalizeString(strings[i])
		if (normalizedString.includes(normalizedMarker)) {
			return strings[i + 1] || null
		}
	}

	return null
}

/**
 * Returns the best guess for a value the was inside a pdf node's text value together with the given marker
 */
export const getBestGuessedTextValueNextToMarker = async (
	file: PDFDocumentProxy,
	marker: string,
	area: Area | undefined,
): Promise<string | null> => {
	const normalizedMarker = normalizeString(marker)
	const strings = await getStringsWithMarker(file, marker, area)

	let candidates = strings
		.map((s) => s.trim()) // trim all strings
		.filter((s) => normalizeString(s).length > normalizedMarker.length) // it's not helpful if the text contains only the marker
	candidates = uniq(candidates)

	// we need at least one candidate
	if (candidates.length === 0) {
		return null
	}

	// our preferred candidate starts with the marker so we can return the value after it
	for (let i = 0; i < candidates.length; i++) {
		if (normalizeString(candidates[i]).startsWith(normalizedMarker)) {
			return candidates[i].substring(normalizedMarker.length).trim()
		}
	}

	// our second preferred candidate ends with the marker so we can return the value before it
	for (let i = 0; i < candidates.length; i++) {
		if (normalizeString(candidates[i]).endsWith(normalizedMarker)) {
			const candidate = candidates[i]
			return candidate.substring(0, candidate.length - normalizedMarker.length).trim()
		}
	}

	// there was no preferred candidate, so we use the first one and return both parts around the marker space-separated
	const candidate = candidates[0]
	const markerStart = normalizeString(candidate).indexOf(normalizedMarker)
	return candidate.substring(0, markerStart) + ' ' + candidate.substring(markerStart + normalizedMarker.length)
}

export const getGuessFromLineWithTextMarker = async (
	file: PDFDocumentProxy,
	marker: string,
	area: Area | undefined,
): Promise<string | null> => {
	if (!file || !file.numPages) {
		return null
	}

	// iterate pages
	for (let pageNumber = 1; pageNumber < file.numPages; pageNumber++) {
		const items = await getPageItems(file, pageNumber, area)
		const itemWithMarker = items.find(
			(item) => item.str && normalizeString(item.str).includes(normalizeString(marker)),
		)
		if (itemWithMarker) {
			const candidates = items
				.filter((item) => areItemsInSameLine(itemWithMarker, item)) // in same line
				.filter((item) => item.transform[4 /* x-coordinate */] > itemWithMarker.transform[4]) // is horizontally more right than the marker
				.sort((a, b) => a.transform[4] - b.transform[4]) // sort by horizontal position ltr

			if (candidates.length) {
				return getMergedTextsFromItems(candidates)[0]
			}
		}
	}

	return null
}

/**
 * Parses the provided PDF File using PDF.js and invokes the callback with an array of strings, representing each page's text content.
 */
export const getComposedStringsOfPdfFile = async (file: PDFDocumentProxy, area: Area | undefined) => {
	if (!canReadFileContents()) throw new Error('Browser does not support reading file contents!')

	// Internal: Extract the strings found on a specific page of the provided PDF document
	const extractPageTextContent = async (pdf: PDFDocumentProxy, pageNumber: number): Promise<string[]> => {
		return getMergedTextsFromItems(await getPageItems(pdf, pageNumber, area))
	}

	if (!file || !file.numPages) {
		// abort if we couldn't read a reasonable pdf
		return null
	}

	const composedTextContentPerPage: string[][] = []
	for (let i = 1; i <= file.numPages; i++) {
		composedTextContentPerPage.push(await extractPageTextContent(file, i))
	}

	// cleanup pdf worker to free allocated memory

	return composedTextContentPerPage
}
