import logging
import tabula

logging.basicConfig(level=logging.INFO)
logging.info("Starting PDF extraction...")

# Path to the PDF file
pdf_path = "dummy.pdf"

try:
    # Extract tables from the PDF into a list of DataFrames
    tables = tabula.read_pdf(pdf_path, pages="all", multiple_tables=True)
    
    if tables:
        logging.info("Tables extracted successfully.")
        print("Extracted Table:")
        print(tables[0])
    else:
        logging.info("No tables found in the PDF.")
    
    # Optionally, save the tables to a CSV file
    output_csv_path = "output.csv"
    tabula.convert_into(pdf_path, output_csv_path, output_format="csv", pages="all")
    logging.info(f"Tables have been saved to {output_csv_path}.")
except Exception as e:
    logging.error(f"An error occurred: {e}")
