|
@@ -6,8 +6,8 @@ def extract_email(content):
|
|
Extracts email id of the expert
|
|
Extracts email id of the expert
|
|
"""
|
|
"""
|
|
try:
|
|
try:
|
|
- pattern = r"[a-z0-9]+@[a-z]+\.[a-z]{2,3}"
|
|
|
|
- return re.search(pattern, content).group()[0]
|
|
|
|
|
|
+ pattern = r"([a-z0-9]+@[a-z]+\.[a-z]{2,3})"
|
|
|
|
+ return re.search(pattern, content).groups()[0]
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
@@ -17,8 +17,8 @@ def extract_zipcode(content):
|
|
Extracts zipcode from the resume
|
|
Extracts zipcode from the resume
|
|
"""
|
|
"""
|
|
try:
|
|
try:
|
|
- pattern = r"\w{2}\s\d{5}"
|
|
|
|
- return re.search(pattern, content).group()[0]
|
|
|
|
|
|
+ pattern = r"(\w{2}\s\d{5})"
|
|
|
|
+ return re.search(pattern, content).groups()[0]
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
@@ -28,8 +28,8 @@ def extract_phone(content):
|
|
Extracts phone number of the expert.
|
|
Extracts phone number of the expert.
|
|
"""
|
|
"""
|
|
try:
|
|
try:
|
|
- pattern = r"\(?\d{3}\)?\-\d{3}\-\d{4}"
|
|
|
|
- return re.search(pattern, content).group()[0]
|
|
|
|
|
|
+ pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})"
|
|
|
|
+ return re.search(pattern, content).group()
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
@@ -39,7 +39,30 @@ def extract_case_numbers(content):
|
|
Extracts all the case numbers associated with resume
|
|
Extracts all the case numbers associated with resume
|
|
"""
|
|
"""
|
|
results = []
|
|
results = []
|
|
- case_numbers = re.compile(r"\d\:\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE)
|
|
|
|
|
|
+ case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE)
|
|
for current in case_numbers.finditer(content):
|
|
for current in case_numbers.finditer(content):
|
|
results.append(current.group().replace(",", ""))
|
|
results.append(current.group().replace(",", ""))
|
|
return list(set(results))
|
|
return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_litigation_experience(content):
|
|
|
|
+ """
|
|
|
|
+ Extracts the litigation experience of the expert
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ pattern = r"(\d+|\w+)\s?years"
|
|
|
|
+ return re.search(pattern, content).group()
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_patents_issued(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the patents issued by the expert
|
|
|
|
+ """
|
|
|
|
+ regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?"
|
|
|
|
+ results = []
|
|
|
|
+ patent = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in patent.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|