|
@@ -0,0 +1,45 @@
|
|
|
+import re
|
|
|
+
|
|
|
+
|
|
|
+def extract_email(content):
|
|
|
+ """
|
|
|
+ Extracts email id of the expert
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ pattern = r"[a-z0-9]+@[a-z]+\.[a-z]{2,3}"
|
|
|
+ return re.search(pattern, content).group()[0]
|
|
|
+ except:
|
|
|
+ return "None"
|
|
|
+
|
|
|
+
|
|
|
+def extract_zipcode(content):
|
|
|
+ """
|
|
|
+ Extracts zipcode from the resume
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ pattern = r"\w{2}\s\d{5}"
|
|
|
+ return re.search(pattern, content).group()[0]
|
|
|
+ except:
|
|
|
+ return "None"
|
|
|
+
|
|
|
+
|
|
|
+def extract_phone(content):
|
|
|
+ """
|
|
|
+ Extracts phone number of the expert.
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ pattern = r"\(?\d{3}\)?\-\d{3}\-\d{4}"
|
|
|
+ return re.search(pattern, content).group()[0]
|
|
|
+ except:
|
|
|
+ return "None"
|
|
|
+
|
|
|
+
|
|
|
+def extract_case_numbers(content):
|
|
|
+ """
|
|
|
+ Extracts all the case numbers associated with resume
|
|
|
+ """
|
|
|
+ results = []
|
|
|
+ case_numbers = re.compile(r"\d\:\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE)
|
|
|
+ for current in case_numbers.finditer(content):
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
+ return list(set(results))
|