|
@@ -1,7 +1,7 @@
|
|
import re
|
|
import re
|
|
|
|
|
|
|
|
|
|
-def date_extraction(content):
|
|
|
|
|
|
+def date_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
extracts filing date from the documents.
|
|
extracts filing date from the documents.
|
|
"""
|
|
"""
|
|
@@ -12,7 +12,18 @@ def date_extraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def address_extraction(content):
|
|
|
|
|
|
+def extract_email(content: str) -> str:
|
|
|
|
+ """
|
|
|
|
+ extracts email from the documents.
|
|
|
|
+ """
|
|
|
|
+ pattern = r"[a-z0-9]+@+[a-z].*?\.\w+\.?\w+"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(pattern, content).group()
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def address_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
extracts address from the documents.
|
|
extracts address from the documents.
|
|
"""
|
|
"""
|
|
@@ -23,7 +34,7 @@ def address_extraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def refer_exteraction(content):
|
|
|
|
|
|
+def refer_exteraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
extract referals from the documents.
|
|
extract referals from the documents.
|
|
"""
|
|
"""
|
|
@@ -37,7 +48,7 @@ def refer_exteraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def case_number_extraction(content):
|
|
|
|
|
|
+def case_number_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the case number from the documents.
|
|
Extracts the case number from the documents.
|
|
"""
|
|
"""
|
|
@@ -49,7 +60,7 @@ def case_number_extraction(content):
|
|
return list(results)
|
|
return list(results)
|
|
|
|
|
|
|
|
|
|
-def expert_name_extraction(content):
|
|
|
|
|
|
+def expert_name_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the name of the expert from the document.
|
|
Extracts the name of the expert from the document.
|
|
"""
|
|
"""
|
|
@@ -60,7 +71,7 @@ def expert_name_extraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def plaintiff_extraction(content):
|
|
|
|
|
|
+def plaintiff_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the plaintiff from the document
|
|
Extracts the plaintiff from the document
|
|
"""
|
|
"""
|
|
@@ -71,7 +82,7 @@ def plaintiff_extraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def defendent_extraction(content):
|
|
|
|
|
|
+def defendent_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts the defendant from the document
|
|
Extracts the defendant from the document
|
|
"""
|
|
"""
|
|
@@ -82,7 +93,7 @@ def defendent_extraction(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def patent_extraction(content):
|
|
|
|
|
|
+def patent_extraction(content: str) -> str:
|
|
"""
|
|
"""
|
|
Extracts patent numbers from the document
|
|
Extracts patent numbers from the document
|
|
"""
|
|
"""
|
|
@@ -94,7 +105,7 @@ def patent_extraction(content):
|
|
return list(result)
|
|
return list(result)
|
|
|
|
|
|
|
|
|
|
-def law_firm_extraction(content):
|
|
|
|
|
|
+def law_firm_extraction(content: str) -> str:
|
|
regex = r""
|
|
regex = r""
|
|
results = []
|
|
results = []
|
|
firm = re.compile(regex, re.IGNORECASE)
|
|
firm = re.compile(regex, re.IGNORECASE)
|
|
@@ -103,13 +114,15 @@ def law_firm_extraction(content):
|
|
return results
|
|
return results
|
|
|
|
|
|
|
|
|
|
-def on_behalf_of_extraction(content):
|
|
|
|
|
|
+def on_behalf_of_extraction(content: str) -> str:
|
|
regex = "on\sbehalf\sof(.*?)(C|c)ase"
|
|
regex = "on\sbehalf\sof(.*?)(C|c)ase"
|
|
- on_behalf_of = re.search(regex, content).groups()[0].strip()
|
|
|
|
- return on_behalf_of
|
|
|
|
|
|
+ # try:
|
|
|
|
+ return re.search(regex, content)
|
|
|
|
+ # except:
|
|
|
|
+ # return "None"
|
|
|
|
|
|
|
|
|
|
-def hourly_compensation(content):
|
|
|
|
|
|
+def hourly_compensation(content: str) -> str:
|
|
"""
|
|
"""
|
|
Returns the hourly compensation of the expert.
|
|
Returns the hourly compensation of the expert.
|
|
"""
|
|
"""
|
|
@@ -120,15 +133,15 @@ def hourly_compensation(content):
|
|
return "None"
|
|
return "None"
|
|
|
|
|
|
|
|
|
|
-def ref_patents(content):
|
|
|
|
|
|
+def ref_patents(content: str) -> str:
|
|
return
|
|
return
|
|
|
|
|
|
|
|
|
|
-def acronym_extraction(content):
|
|
|
|
|
|
+def acronym_extraction(content: str) -> str:
|
|
regex = r"\([A-Z]+\)"
|
|
regex = r"\([A-Z]+\)"
|
|
# results = []
|
|
# results = []
|
|
# acronym = re.compile(regex, re.IGNORECASE)
|
|
# acronym = re.compile(regex, re.IGNORECASE)
|
|
- # for current in acronym.finditer(content):
|
|
|
|
|
|
+ # for current in acronym.finditer(content: str) -> str:
|
|
# results.append(current)
|
|
# results.append(current)
|
|
acronym = re.findall(regex, content)
|
|
acronym = re.findall(regex, content)
|
|
return list(set(acronym))
|
|
return list(set(acronym))
|