1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- import re
- def extract_email(content):
- """
- Extracts email id of the expert
- """
- try:
- pattern = r"([a-z0-9]+@[a-z]+\.[a-z]{2,3})"
- return re.search(pattern, content).group()
- except:
- return "None"
- def extract_zipcode(content):
- """
- Extracts zipcode from the resume
- """
- try:
- pattern = r"(\w{2}\s\d{5})"
- return re.search(pattern, content).groups()[0]
- except:
- return "None"
- def extract_phone(content):
- """
- Extracts phone number of the expert.
- """
- try:
- pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})"
- return re.search(pattern, content).group()
- except:
- return "None"
- def extract_case_numbers(content):
- """
- Extracts all the case numbers associated with resume
- """
- results = []
- case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE)
- for current in case_numbers.finditer(content):
- results.append(current.group().replace(",", ""))
- return list(set(results))
- def extract_litigation_experience(content):
- """
- Extracts the litigation experience of the expert
- """
- try:
- pattern = r"(\d+|\w+)\s?years"
- return re.search(pattern, content).group()
- except:
- return "None"
- def extract_patents_issued(content):
- """
- Returns the patents issued by the expert
- """
- regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?"
- results = []
- patent = re.compile(regex, re.IGNORECASE)
- for current in patent.finditer(content):
- results.append(current.group().replace(",", ""))
- return list(set(results))
- def extract_name(content):
- """
- Returns the name of the expert
- """
- # pattern = r"(\w+\s\w+.*?)Resume" Old pattern
- pattern = r"(RESUME|\SResume)\s(\w+\s\w+\.?\s\w+)|\-(\s.*?)Resume"
- try:
- return re.search(pattern, content).group()
- except:
- return "None"
|