import re def extract_email(content): """ Extracts email id of the expert """ try: pattern = r"[a-z0-9]+@+[a-z].*?\.\w+\.?\w+" return re.search(pattern, content).group() except: return "None" def extract_zipcode(content): """ Extracts zipcode from the resume """ pattern = r"[A-Z]{2}\s\d{5,6}" try: return re.search(pattern, content).group() except: return "None" def extract_phone(content): """ Extracts phone number of the expert. """ try: pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})" return re.search(pattern, content).group() except: return "None" def extract_case_numbers(content): """ Extracts all the case numbers associated with resume """ results = [] case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE) for current in case_numbers.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_litigation_experience(content): """ Extracts the litigation experience of the expert """ try: pattern = r"(\d+|\w+)\s?years" return re.search(pattern, content).group() except: return "None" def extract_patents_issued(content): """ Returns the patents issued by the expert """ regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?" results = [] patent = re.compile(regex, re.IGNORECASE) for current in patent.finditer(content): results.append(current.group().replace(",", "")) return list(set(results)) def extract_name(content): """ Returns the name of the expert """ # pattern = r"(RESUME|\SResume)\s(\w+\s\w+\.?\s\w+)|\-(\s.*?)Resume" Old pattern pattern = r"\w+.*\n" try: return re.search(pattern, content).group() except: return "None"