parse_expert_pdf_utils.py 881 B

12345678910111213141516171819202122232425262728
  1. from collections import defaultdict
  2. import re
  3. from distutils.filelist import findall
  4. def case_number_extraction(content):
  5. dict_case_numbers = defaultdict(int)
  6. case_number_info = re.findall("Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})", content)
  7. case_number = ""
  8. for element in case_number_info:
  9. dict_case_numbers[element] += 1
  10. for mykey, value in dict_case_numbers.items():
  11. case_number = mykey
  12. return case_number
  13. def expert_name_extraction(content):
  14. regex = r"\bEXPERT\sREPORT\sOF\s(.+?)," # \bEXPERT\sREPORT\sOF\s(.+?),
  15. results = []
  16. expert = re.compile(regex, re.IGNORECASE)
  17. for current in expert.finditer(content):
  18. results.append(current.group().replace(",", ""))
  19. return list(set(results))
  20. # return print(expert_names)
  21. def plaintiff_extraction(content):
  22. plaintiff_info = re.findall("", content)