Bladeren bron

added new parsers

Harsh Parikh 2 jaren geleden
bovenliggende
commit
0f6ba65fb1
1 gewijzigde bestanden met toevoegingen van 30 en 0 verwijderingen
  1. 30 0
      server_documents/preliminary_response/utils.py

+ 30 - 0
server_documents/preliminary_response/utils.py

@@ -1,5 +1,6 @@
 import re
 
+
 def email_extraction(content: str) -> str:
     """
     Extracts emails from a document.
@@ -11,6 +12,7 @@ def email_extraction(content: str) -> str:
         result.append(email.group())
     return result
 
+
 def telephone_number_extraction(content: str) -> str:
     """
     Extracts telephone number[s?] from a document
@@ -22,6 +24,7 @@ def telephone_number_extraction(content: str) -> str:
         result.append(number.group())
     return result
 
+
 def address_extraction(content: str) -> str:
     """
     extracts address from the documents.
@@ -32,6 +35,7 @@ def address_extraction(content: str) -> str:
     except:
         return "None"
 
+
 def case_number_extraction(content: str) -> str:
     """
     Extracts the case number from the documents.
@@ -43,6 +47,7 @@ def case_number_extraction(content: str) -> str:
         results.add(current.groups()[0])
     return list(results)
 
+
 def plaintiff_extraction(content: str) -> str:
     """
     Extracts the plaintiff from the document
@@ -53,6 +58,7 @@ def plaintiff_extraction(content: str) -> str:
     except:
         return "None"
 
+
 def defendent_extraction(content: str) -> str:
     """
     Extracts the defendant from the document
@@ -63,6 +69,7 @@ def defendent_extraction(content: str) -> str:
     except:
         return "None"
 
+
 def patent_extraction(content: str) -> str:
     """
     Extracts patent numbers from the document
@@ -74,6 +81,7 @@ def patent_extraction(content: str) -> str:
         result.add(current.group().replace(",", ""))
     return list(result)
 
+
 def acronym_extraction(content: str) -> str:
     regex = r"\(\“([A-Z]{3,4})\”\)"
     results = set()
@@ -81,3 +89,25 @@ def acronym_extraction(content: str) -> str:
     for current in acronym.finditer(content):
         results.add(current.group(1))
     return list(results)
+
+
+def extract_filing_date(content: str) -> str:
+    """
+    Extracts filing date of the document.
+    """
+    regex = r"Dated?\:\s(\w+\s\d\,\s\d{4})"
+    try:
+        return re.search(regex, content).group(1)
+    except:
+        return "None"
+
+
+def extract_attorney(content: str) -> str:
+    """
+    Extracts the name of the attorney
+    """
+    regex = r"Dated?\:.*?\/(.*?)\/"
+    try:
+        return re.search(regex, content).group(1)
+    except:
+        return "None"