import re def extract_email(content): """ Extracts email id of the expert """ try: pattern = r"([a-z0-9]+@[a-z]+\.[a-z]{2,3})" return re.search(pattern, content).groups()[0] except: return "None" def extract_zipcode(content): """ Extracts zipcode from the resume """ try: pattern = r"(\w{2}\s\d{5})" return re.search(pattern, content).groups()[0] except: return "None" def extract_phone(content): """ Extracts phone number of the expert. """ try: pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})" return re.search(pattern, content).group() except: return "None" def extract_case_numbers(content): """ Extracts all the case numbers associated with resume """ results = [] case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE) for current in case_numbers.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_litigation_experience(content): """ Extracts the litigation experience of the expert """ try: pattern = r"(\d+|\w+)\s?years" return re.search(pattern, content).group() except: return "None" def extract_patents_issued(content): """ Returns the patents issued by the expert """ regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?" results = [] patent = re.compile(regex, re.IGNORECASE) for current in patent.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_name(content): """ Returns the name of the expert """ pattern = r"(\w+\s\w+.*?)Resume" try: return re.search(pattern, content).groups()[0] except: return "None"