import tabula
import os

def extract_pdf_to_csv(pdf_path, csv_path):
    try:
        # Check if the PDF file exists
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"The file at {pdf_path} does not exist.")

        # Extract data from the PDF
        df = tabula.read_pdf(pdf_path, pages='all')

        # Ensure the output directory exists
        os.makedirs(os.path.dirname(csv_path), exist_ok=True)

        # Save the extracted data to a CSV file
        if isinstance(df, list):
            for i, table in enumerate(df):
                table.to_csv(f'{csv_path}_part_{i}.csv', index=False)
        else:
            df.to_csv(csv_path, index=False)

        print(f'Data successfully extracted and saved to {csv_path}')
    
    except FileNotFoundError as fnf_error:
        print(fnf_error)
    except tabula.io._pdf.PyPDFError as pdf_error:
        print(f"PDF error: {pdf_error}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("Process completed.")

# Define the path to your PDF file and the output CSV file
pdf_path = r'dummy.pdf'
csv_path = r'file.csv'

# Call the function
extract_pdf_to_csv(pdf_path, csv_path)
