import re def extract_email(content: str) -> str: """ Extracts email id of the expert """ pattern = r"[a-z0-9]+@+[a-z].*?\.\w+\.?\w+" try: return re.search(pattern, content).group() except: return "None" def extract_zipcode(content: str) -> str: """ Extracts zipcode from the resume """ pattern = r"[A-Z]{2}\s\d{5,6}" try: return re.search(pattern, content).group() except: return "None" def extract_phone(content: str) -> str: """ Extracts phone number of the expert. """ pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})" try: return re.search(pattern, content).group() except: return "None" def extract_case_numbers(content: str) -> str: """ Extracts all the case numbers associated with resume """ results = [] case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE) for current in case_numbers.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_litigation_experience(content: str) -> str: """ Extracts the litigation experience of the expert """ pattern = r"(\d+|\w+)\s?years" try: return re.search(pattern, content).group() except: return "None" def extract_patents_issued(content: str) -> str: """ Returns the patents issued by the expert """ regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?" results = [] patent = re.compile(regex, re.IGNORECASE) for current in patent.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_name(content: str) -> str: """ Returns the name of the expert """ # pattern = r"(RESUME|\SResume)\s(\w+\s\w+\.?\s\w+)|\-(\s.*?)Resume" Old pattern pattern = r"\w+.*\n" try: return re.search(pattern, content).group() except: return "None"