from collections import defaultdict import re from distutils.filelist import findall def case_number_extraction(content): dict_case_numbers = defaultdict(int) case_number_info = re.findall("Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})", content) case_number = "" for element in case_number_info: dict_case_numbers[element] += 1 for mykey, value in dict_case_numbers.items(): case_number = mykey return case_number def expert_name_extraction(content): regex = r"\bEXPERT\sREPORT\sOF\s(.+?)," # \bEXPERT\sREPORT\sOF\s(.+?), results = [] expert = re.compile(regex, re.IGNORECASE) for current in expert.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) # return print(expert_names) def plaintiff_extraction(content): plaintiff_info = re.findall("", content)