|
@@ -1,5 +1,6 @@
|
|
import re
|
|
import re
|
|
|
|
|
|
|
|
+
|
|
def email_extraction(content: str) -> str:
|
|
def email_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts emails from a document.
|
|
Extracts emails from a document.
|
|
@@ -11,6 +12,7 @@ def email_extraction(content: str) -> str:
|
|
result.append(email.group())
|
|
result.append(email.group())
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
+
|
|
def telephone_number_extraction(content: str) -> str:
|
|
def telephone_number_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts telephone number[s?] from a document
|
|
Extracts telephone number[s?] from a document
|
|
@@ -22,6 +24,7 @@ def telephone_number_extraction(content: str) -> str:
|
|
result.append(number.group())
|
|
result.append(number.group())
|
|
return result
|
|
return result
|
|
|
|
|
|
|
|
+
|
|
def address_extraction(content: str) -> str:
|
|
def address_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
extracts address from the documents.
|
|
extracts address from the documents.
|
|
@@ -32,6 +35,7 @@ def address_extraction(content: str) -> str:
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
+
|
|
def case_number_extraction(content: str) -> str:
|
|
def case_number_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the case number from the documents.
|
|
Extracts the case number from the documents.
|
|
@@ -43,6 +47,7 @@ def case_number_extraction(content: str) -> str:
|
|
results.add(current.groups()[0])
|
|
results.add(current.groups()[0])
|
|
return list(results)
|
|
return list(results)
|
|
|
|
|
|
|
|
+
|
|
def plaintiff_extraction(content: str) -> str:
|
|
def plaintiff_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the plaintiff from the document
|
|
Extracts the plaintiff from the document
|
|
@@ -53,6 +58,7 @@ def plaintiff_extraction(content: str) -> str:
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
+
|
|
def defendent_extraction(content: str) -> str:
|
|
def defendent_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the defendant from the document
|
|
Extracts the defendant from the document
|
|
@@ -63,6 +69,7 @@ def defendent_extraction(content: str) -> str:
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
+
|
|
def patent_extraction(content: str) -> str:
|
|
def patent_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts patent numbers from the document
|
|
Extracts patent numbers from the document
|
|
@@ -74,6 +81,7 @@ def patent_extraction(content: str) -> str:
|
|
result.add(current.group().replace(",", ""))
|
|
result.add(current.group().replace(",", ""))
|
|
return list(result)
|
|
return list(result)
|
|
|
|
|
|
|
|
+
|
|
def acronym_extraction(content: str) -> str:
|
|
def acronym_extraction(content: str) -> str:
|
|
regex = r"\(\“([A-Z]{3,4})\”\)"
|
|
regex = r"\(\“([A-Z]{3,4})\”\)"
|
|
results = set()
|
|
results = set()
|
|
@@ -81,3 +89,25 @@ def acronym_extraction(content: str) -> str:
|
|
for current in acronym.finditer(content):
|
|
for current in acronym.finditer(content):
|
|
results.add(current.group(1))
|
|
results.add(current.group(1))
|
|
return list(results)
|
|
return list(results)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_filing_date(content: str) -> str:
|
|
|
|
+ """
|
|
|
|
+ Extracts filing date of the document.
|
|
|
|
+ """
|
|
|
|
+ regex = r"Dated?\:\s(\w+\s\d\,\s\d{4})"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).group(1)
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_attorney(content: str) -> str:
|
|
|
|
+ """
|
|
|
|
+ Extracts the name of the attorney
|
|
|
|
+ """
|
|
|
|
+ regex = r"Dated?\:.*?\/(.*?)\/"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).group(1)
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|