
import React, { Component } from 'react';
import Tesseract from 'tesseract.js';
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf';
import { GlobalWorkerOptions } from 'pdfjs-dist/legacy/build/pdf';

GlobalWorkerOptions.workerSrc = `//cdnjs.cloudflare.com/ajax/libs/pdf.js/${pdfjsLib.version}/pdf.worker.js`;

interface ImageOcrProps {
  file: File | null;
  updateOcrData: (firstName: string, middleName: string, surname: string, passportNo: string, nationality: string, dob: string) => void;
}

interface PassportData {
  passportNumber: string;
  surname: string;
  firstName: string;
  middleName: string;
  nationality: string;
  dateOfBirth: string;
}

interface ImageOcrState {
  ocrText: string;
  loading: boolean;
  passportData: PassportData;
}

class ImageOcr extends Component<ImageOcrProps, ImageOcrState> {
  constructor(props: ImageOcrProps) {
    super(props);

    this.state = {
      ocrText: '',
      loading: false,
      passportData: {
        passportNumber: '',
        surname: '',
        firstName: '',
        middleName: '',
        nationality: '',
        dateOfBirth: '',
      },
    };
  }

  
  extractTextFromImage = async (imageUrl: string) => {
    this.setState({ loading: true });

    try {
      const { data: { text } } = await Tesseract.recognize(imageUrl, 'eng', {
        logger: (m) => console.log(m), 
      });

      this.setState({ ocrText: text });
      console.log('Extracted Text:', text);
      this.parsePassportData(text);
    } catch (err) {
      console.error('OCR Error: ', err);
    } finally {
      this.setState({ loading: false });
    }
  };

  
  parsePassportData = (text: string) => {
    const lines = text.split('\n');
    let newPassportData: PassportData = {
      passportNumber: '',
      surname: '',
      firstName: '',
      middleName: '',
      nationality: '',
      dateOfBirth: '',
    };

    let foundPassportNumber = false;
    let foundSurname = false;
    let foundFirstName = false;
    let foundMiddleName = false;
    let foundNationality = false;
    let foundDateOfBirth = false;

    const lowerCaseLines = lines.map(line => line.toLowerCase());

    lowerCaseLines.forEach((line, index) => {
      if (!foundPassportNumber && line.includes('passport')) {
        newPassportData.passportNumber = lines[index + 1]?.trim() || ''; 
        foundPassportNumber = true; 
      } else if (!foundSurname && line.includes('surname')) {
        newPassportData.surname = lines[index + 1]?.trim() || ''; 
        foundSurname = true;
      } else if (!foundFirstName && line.includes('given name')) {
        const fullName = lines[index + 1]?.trim() || '';
        const nameParts = fullName.split(' '); 
        newPassportData.firstName = nameParts[0] || ''; 
        newPassportData.middleName = nameParts.slice(1).join(' ') || ''; 
        foundFirstName = true; 
        foundMiddleName = true; 
      } else if (!foundNationality && line.includes('nationality')) {
        newPassportData.nationality = lines[index + 1]?.trim() || ''; 
        foundNationality = true; 
      } else if (!foundDateOfBirth && line.includes('date of birth')) {
        newPassportData.dateOfBirth = lines[index + 1]?.trim() || ''; 
        foundDateOfBirth = true; 
      }

      if (
        foundPassportNumber &&
        foundSurname &&
        foundFirstName &&
        foundMiddleName &&
        foundNationality &&
        foundDateOfBirth
      ) {
        return; 
      }
    });

    this.props.updateOcrData(
      newPassportData.firstName,
      newPassportData.middleName,
      newPassportData.surname,
      newPassportData.passportNumber,
      newPassportData.nationality,
      newPassportData.dateOfBirth
    );

    this.setState({ passportData: newPassportData }); 
  };

  extractTextFromPdf = async (file: File) => {
    this.setState({ loading: true });
  
    try {
      
      const loadingTask = pdfjsLib.getDocument(URL.createObjectURL(file));
      const pdf = await loadingTask.promise;
  
      for (let i = 1; i <= pdf.numPages; i++) {
        const page = await pdf.getPage(i);
        const viewport = page.getViewport({ scale: 1.5 }); 
        const canvas = document.createElement('canvas');
        const context = canvas.getContext('2d');
        if (context) {
          canvas.width = viewport.width;
          canvas.height = viewport.height;
  
          const renderContext = {
            canvasContext: context as CanvasRenderingContext2D,  
            viewport: viewport,
          };
          await page.render(renderContext).promise;
          const imageUrl = canvas.toDataURL(); 
          await this.extractTextFromImage(imageUrl);
        } else {
          console.error('Canvas context could not be initialized');
        }
      }
  
    } catch (error) {
      console.error('Error processing PDF:', error);
    } finally {
      this.setState({ loading: false });
    }
  };
  
  

  handleFileChange = async (file: File) => {
    const reader = new FileReader();

    reader.onload = async (event) => {
      if (event.target?.result) {
        const imageUrl = event.target.result as string; 
        this.extractTextFromImage(imageUrl); 
      }
    };

    reader.readAsDataURL(file);
  };

  componentDidUpdate(prevProps: ImageOcrProps) {
    if (this.props.file && this.props.file !== prevProps.file) {
      const fileExtension = this.props.file.name.split('.').pop()?.toLowerCase();
      if (fileExtension === 'pdf') {
        this.extractTextFromPdf(this.props.file);
      } else {
        this.handleFileChange(this.props.file);
      }
    }
  }

  render() {
    const { loading, ocrText } = this.state;

    return (
      <div>
      </div>
    );
  }
}

export default ImageOcr;


