123456789101112131415161718192021222324252627282930 |
- from parse_expert_pdf_utils import (
- case_number_extraction,
- expert_name_extraction,
- )
- import tika
- from tika import parser
- import re
- import warnings
- warnings.filterwarnings('ignore')
- def main():
- tika.initVM()
- path = "/home/omkardesai/Code/pdf_parser/pdfs/expert_parse_pdf.pdf"
- parsed_pdf = parser.from_file(path)
- print(parsed_pdf.keys())
- # for mykeys, myvalues in parsed_pdf['metadata'].items():
- # print(f"{mykeys}")
- # print(f"{myvalues}")
- content = parsed_pdf['content']
- # print(content)
- case_number = case_number_extraction(content)
- print(case_number)
- expert_name = expert_name_extraction(content)
- print(expert_name)
- if __name__ == "__main__":
- main()
|