from parse_expert_pdf_utils import ( case_number_extraction, expert_name_extraction, plaintiff_extraction, defendent_extraction, patent_extraction, on_behalf_of_extraction, acronym_extraction, ) import tika from tika import parser tika.initVM() import os def main(PATH): required_files = [file for file in os.listdir(PATH) if file.find(".pdf") != -1] for file in required_files: content = parser.from_file(file)["content"].strip().replace("\n", "") # case_number = case_number_extraction(content) # print(case_number) # expert_name = expert_name_extraction(content) # print(expert_name) # plaintiff = plaintiff_extraction(content) # print(plaintiff) # defendent = defendent_extraction(content) # print(defendent) # patent = patent_extraction(content) # print(patent) # on_behalf_of = on_behalf_of_extraction(content) # print(on_behalf_of) acronym = acronym_extraction(content) print(acronym) if __name__ == "__main__": HOME_DIR = os.path.expanduser("~") BASE_DIR = "Code/pdf_parser/expert_report" PATH = os.path.join(HOME_DIR, BASE_DIR) main(PATH)