parse_expert_pdf.py 730 B

123456789101112131415161718192021222324252627282930
  1. from parse_expert_pdf_utils import (
  2. case_number_extraction,
  3. expert_name_extraction,
  4. )
  5. import tika
  6. from tika import parser
  7. import re
  8. import warnings
  9. warnings.filterwarnings('ignore')
  10. def main():
  11. tika.initVM()
  12. path = "/home/omkardesai/Code/pdf_parser/pdfs/expert_parse_pdf.pdf"
  13. parsed_pdf = parser.from_file(path)
  14. print(parsed_pdf.keys())
  15. # for mykeys, myvalues in parsed_pdf['metadata'].items():
  16. # print(f"{mykeys}")
  17. # print(f"{myvalues}")
  18. content = parsed_pdf['content']
  19. # print(content)
  20. case_number = case_number_extraction(content)
  21. print(case_number)
  22. expert_name = expert_name_extraction(content)
  23. print(expert_name)
  24. if __name__ == "__main__":
  25. main()