|
@@ -0,0 +1,103 @@
|
|
|
|
+import re
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_patent_number(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the list of unique patent_numbers in the document
|
|
|
|
+ """
|
|
|
|
+ regex = r"\d{1,3}\,\d{1,3}\,\d{3}"
|
|
|
|
+ results = []
|
|
|
|
+ patent = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in patent.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_case_number(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the lisr of unique case_numbers
|
|
|
|
+ """
|
|
|
|
+ regex = r"(\d{1,})\:(\d{1,}\-\w{1,}\-\d{5,}\-\w+)+"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).groups()[1]
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_hourly_compensation(content):
|
|
|
|
+ """
|
|
|
|
+ Returns hourly compensation.
|
|
|
|
+ """
|
|
|
|
+ results = []
|
|
|
|
+ hourly_comp_re = re.compile("\$\d{1,20}", re.IGNORECASE)
|
|
|
|
+ for current in hourly_comp_re.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_expert_name(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the name of the expert
|
|
|
|
+ """
|
|
|
|
+ results = []
|
|
|
|
+ exp_name = re.compile(r"\b(REPORT OF ).*\S[.]")
|
|
|
|
+ for current in exp_name.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_plaintiff(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the name of the plaintiff
|
|
|
|
+ previous = (OF\s\w{1,})(.*)Plaintiff[s]?,
|
|
|
|
+ """
|
|
|
|
+ regex = r"OF\s\w+(\s.*?\,).*?Plaintiff"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).groups()[0]
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_defendent(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the name of the defendant
|
|
|
|
+ """
|
|
|
|
+ regex = r"Plaintiff[s]?.*v[s]?\.(.*?)Defendant[s]?\."
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).groups()[0]
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_acronyms(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the list of all the acronyms present
|
|
|
|
+ """
|
|
|
|
+ regex = r"\(“(\w{3})”\)"
|
|
|
|
+ results = []
|
|
|
|
+ plaintiff = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in plaintiff.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_firm_name(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the list of firm names present in the documents.
|
|
|
|
+ """
|
|
|
|
+ regex = r"(\(Firm\sName\,\sAddress\,\sand\sTelephone\sNumber\))([\r\n]+([^\r\n]+))"
|
|
|
|
+ results = []
|
|
|
|
+ firm = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in firm.finditer(content):
|
|
|
|
+ results.append(current.group().replace(",", ""))
|
|
|
|
+ return list(set(results))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def extract_filing_date(content):
|
|
|
|
+ """
|
|
|
|
+ Returns the filing date.
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ return re.search(r"(Filed)\s(\d{2}\/\d{2}\/\d{2})", content).groups()[1]
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|