from parse_expert_pdf_utils import ( case_number_extraction, expert_name_extraction, plaintiff_extraction, defendent_extraction, patent_extraction, on_behalf_of_extraction, acronym_extraction, ) import tika from tika import parser import re import warnings warnings.filterwarnings('ignore') def main(): tika.initVM() path = "/home/omkardesai/Code/pdf_parser/pdfs/expert_report1.pdf" parsed_pdf = parser.from_file(path) print(parsed_pdf.keys()) content = parsed_pdf['content'].strip().replace('\n', '') # print(content) # case_number = case_number_extraction(content) # print(case_number) # expert_name = expert_name_extraction(content) # print(expert_name) # plaintiff = plaintiff_extraction(content) # print(plaintiff) # defendent = defendent_extraction(content) # print(defendent) # patent = patent_extraction(content) # print(patent) # on_behalf_of = on_behalf_of_extraction(content) # print(on_behalf_of) acronym = acronym_extraction(content) print(acronym) if __name__ == "__main__": main()