|
@@ -35,17 +35,6 @@ def extract_hourly_compensation(content):
|
|
return list(set(results))
|
|
return list(set(results))
|
|
|
|
|
|
|
|
|
|
-def extract_expert_name(content):
|
|
|
|
- """
|
|
|
|
- Returns the name of the expert
|
|
|
|
- """
|
|
|
|
- results = []
|
|
|
|
- exp_name = re.compile(r"\b(REPORT OF ).*\S[.]")
|
|
|
|
- for current in exp_name.finditer(content):
|
|
|
|
- results.append(current.group().replace(",", ""))
|
|
|
|
- return list(set(results))
|
|
|
|
-
|
|
|
|
-
|
|
|
|
def extract_plaintiff(content):
|
|
def extract_plaintiff(content):
|
|
"""
|
|
"""
|
|
Returns the name of the plaintiff
|
|
Returns the name of the plaintiff
|
|
@@ -61,8 +50,9 @@ def extract_plaintiff(content):
|
|
def extract_defendent(content):
|
|
def extract_defendent(content):
|
|
"""
|
|
"""
|
|
Returns the name of the defendant
|
|
Returns the name of the defendant
|
|
|
|
+ Plaintiff[s]?.*v[s]?\.(.*?)Defendant[s]?\.
|
|
"""
|
|
"""
|
|
- regex = r"Plaintiff[s]?.*v[s]?\.(.*?)Defendant[s]?\."
|
|
|
|
|
|
+ regex = r"Plaintiff[s]?\,.*?[v|V]\.(.*?)Defendant[s]?\.?"
|
|
try:
|
|
try:
|
|
return re.search(regex, content).groups()[0]
|
|
return re.search(regex, content).groups()[0]
|
|
except:
|
|
except:
|
|
@@ -72,12 +62,15 @@ def extract_defendent(content):
|
|
def extract_acronyms(content):
|
|
def extract_acronyms(content):
|
|
"""
|
|
"""
|
|
Returns the list of all the acronyms present
|
|
Returns the list of all the acronyms present
|
|
|
|
+ \(["|“](\w{1,10})
|
|
"""
|
|
"""
|
|
- regex = r"\(“(\w{3})”\)"
|
|
|
|
|
|
+ regex = r'\(["|“](\w{1,10})["|”]\)'
|
|
results = []
|
|
results = []
|
|
plaintiff = re.compile(regex, re.IGNORECASE)
|
|
plaintiff = re.compile(regex, re.IGNORECASE)
|
|
for current in plaintiff.finditer(content):
|
|
for current in plaintiff.finditer(content):
|
|
- results.append(current.group().replace(",", ""))
|
|
|
|
|
|
+ results.append(current.groups()[0].replace(",", ""))
|
|
|
|
+ if len(results) == 0:
|
|
|
|
+ return "None"
|
|
return list(set(results))
|
|
return list(set(results))
|
|
|
|
|
|
|
|
|
|
@@ -101,3 +94,15 @@ def extract_filing_date(content):
|
|
return re.search(r"(Filed)\s(\d{2}\/\d{2}\/\d{2})", content).groups()[1]
|
|
return re.search(r"(Filed)\s(\d{2}\/\d{2}\/\d{2})", content).groups()[1]
|
|
except:
|
|
except:
|
|
return "None"
|
|
return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_attorney_name(content):
|
|
|
|
+ """
|
|
|
|
+ returns the name of the attorney/attornies.
|
|
|
|
+ """
|
|
|
|
+ regex = r"\/s\/\s\w+\s\w\.\s\w+"
|
|
|
|
+ results = []
|
|
|
|
+ attorney = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in attorney.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|