Prechádzať zdrojové kódy

added parser elements for extraction of names from expert resume

Harsh Parikh 2 rokov pred
rodič
commit
1de14ff7d7

+ 2 - 0
expert_resume/parse_expert_resume.py

@@ -14,6 +14,7 @@ from parse_resume_utils import (
     extract_case_numbers,
     extract_litigation_experience,
     extract_patents_issued,
+    extract_name,
 )
 
 
@@ -26,6 +27,7 @@ def main(PATH):
     for idx, file in enumerate(required_files):
         print(idx, file)
         parse_content = parser.from_file(file)["content"].strip().replace("\n", "")
+        data_dict["name"].append(extract_name(parse_content))
         data_dict["email"].append(extract_email(parse_content))
         data_dict["phone"].append(extract_phone(parse_content))
         data_dict["zipcode"].append(extract_zipcode(parse_content))

+ 11 - 0
expert_resume/parse_resume_utils.py

@@ -66,3 +66,14 @@ def extract_patents_issued(content):
     for current in patent.finditer(content):
         results.append(current.group().replace(",", ""))
     return list(set(results))
+
+
+def extract_name(content):
+    """
+    Returns the name of the expert
+    """
+    pattern = r"(\w+\s\w+.*?)Resume"
+    try:
+        return re.search(pattern, content).groups()[0]
+    except:
+        return "None"