parse_resume_utils.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import re
  2. def extract_email(content: str) -> str:
  3. """
  4. Extracts email id of the expert
  5. """
  6. pattern = r"[a-z0-9]+@+[a-z].*?\.\w+\.?\w+"
  7. try:
  8. return re.search(pattern, content).group()
  9. except:
  10. return "None"
  11. def extract_zipcode(content: str) -> str:
  12. """
  13. Extracts zipcode from the resume
  14. """
  15. pattern = r"[A-Z]{2}\s\d{5,6}"
  16. try:
  17. return re.search(pattern, content).group()
  18. except:
  19. return "None"
  20. def extract_phone(content: str) -> str:
  21. """
  22. Extracts phone number of the expert.
  23. """
  24. pattern = r"(\(?\d{3}\)?\-?\s?\d{3}\-\d{4})"
  25. try:
  26. return re.search(pattern, content).group()
  27. except:
  28. return "None"
  29. def extract_case_numbers(content: str) -> str:
  30. """
  31. Extracts all the case numbers associated with resume
  32. """
  33. results = []
  34. case_numbers = re.compile(r"\d{2}\-\w+\-\d+\-\w+\-?\w+", re.IGNORECASE)
  35. for current in case_numbers.finditer(content):
  36. results.append(current.group().replace(",", ""))
  37. return list(set(results))
  38. def extract_litigation_experience(content: str) -> str:
  39. """
  40. Extracts the litigation experience of the expert
  41. """
  42. pattern = r"(\d+|\w+)\s?years"
  43. try:
  44. return re.search(pattern, content).group()
  45. except:
  46. return "None"
  47. def extract_patents_issued(content: str) -> str:
  48. """
  49. Returns the patents issued by the expert
  50. """
  51. regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?"
  52. results = []
  53. patent = re.compile(regex, re.IGNORECASE)
  54. for current in patent.finditer(content):
  55. results.append(current.group().replace(",", ""))
  56. return list(set(results))
  57. def extract_name(content: str) -> str:
  58. """
  59. Returns the name of the expert
  60. """
  61. # pattern = r"(RESUME|\SResume)\s(\w+\s\w+\.?\s\w+)|\-(\s.*?)Resume" Old pattern
  62. pattern = r"\w+.*\n"
  63. try:
  64. return re.search(pattern, content).group()
  65. except:
  66. return "None"