|
@@ -4,25 +4,89 @@ from distutils.filelist import findall
|
|
|
|
|
|
|
|
|
|
def case_number_extraction(content):
|
|
def case_number_extraction(content):
|
|
- dict_case_numbers = defaultdict(int)
|
|
|
|
- case_number_info = re.findall("Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})", content)
|
|
|
|
- case_number = ""
|
|
|
|
- for element in case_number_info:
|
|
|
|
- dict_case_numbers[element] += 1
|
|
|
|
- for mykey, value in dict_case_numbers.items():
|
|
|
|
- case_number = mykey
|
|
|
|
- return case_number
|
|
|
|
|
|
+ # dict_case_numbers = defaultdict(int)
|
|
|
|
+ # case_number_info = re.findall("Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})", content)
|
|
|
|
+ # case_number = ""
|
|
|
|
+ # for element in case_number_info:
|
|
|
|
+ # dict_case_numbers[element] += 1
|
|
|
|
+ # for mykey, value in dict_case_numbers.items():
|
|
|
|
+ # case_number = mykey
|
|
|
|
+ # return case_number
|
|
|
|
+ regex = r"Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})" # Case\sNo\.\s(\d\:\d{2}\-\w{2}\-\d{5}\-\w{3})
|
|
|
|
+ results = []
|
|
|
|
+ case_number = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in case_number.finditer(content):
|
|
|
|
+ results.append(current.groups()[0])
|
|
|
|
+ return list(set(results))
|
|
|
|
|
|
|
|
|
|
def expert_name_extraction(content):
|
|
def expert_name_extraction(content):
|
|
- regex = r"\bEXPERT\sREPORT\sOF\s(.+?)," # \bEXPERT\sREPORT\sOF\s(.+?),
|
|
|
|
|
|
+ regex = r"\bEXPERT\sREPORT\sOF\s(.*?),.*(REGARDING|Invalidity)?" # "\bEXPERT\sREPORT\sOF\s(.+?),"
|
|
results = []
|
|
results = []
|
|
- expert = re.compile(regex, re.IGNORECASE)
|
|
|
|
- for current in expert.finditer(content):
|
|
|
|
- results.append(current.group().replace(",", ""))
|
|
|
|
|
|
+ expert_name = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in expert_name.finditer(content):
|
|
|
|
+ results.append(current.groups()[0])
|
|
return list(set(results))
|
|
return list(set(results))
|
|
- # return print(expert_names)
|
|
|
|
|
|
|
|
|
|
|
|
def plaintiff_extraction(content):
|
|
def plaintiff_extraction(content):
|
|
- plaintiff_info = re.findall("", content)
|
|
|
|
|
|
+ regex = r"\bDIVISION([\s\S]*?)Plaintiff\," # "OF\s\w+(\s.*?)\,.*Plaintiff" # "(.*)\s\nPlaintiff,"
|
|
|
|
+ results = []
|
|
|
|
+ plaintiff = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in plaintiff.finditer(content):
|
|
|
|
+ results.append(current.groups()[0].strip())
|
|
|
|
+ return results
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def defendent_extraction(content):
|
|
|
|
+ # "Plaintiff.*\n.*v\.([\s\S]*?)Defendant" # "\bv\.([\s\S]*?)Defendant"
|
|
|
|
+ regex = r"Plaintiff.*v\.([\s\S]*?)Defendant"
|
|
|
|
+ results = []
|
|
|
|
+ defendent = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in defendent.finditer(content):
|
|
|
|
+ results.append(current.groups()[0].strip())
|
|
|
|
+ return results
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def patent_extraction(content):
|
|
|
|
+ regex = r"(U\.S\.\sPATENT\sNO.\s\d\,\d{3}\,\d{3})" # "U\.S\.\sPATENT\sNO.*\d{1,2}\,\d{3}\,\d{3}"
|
|
|
|
+ # results = []
|
|
|
|
+ # patent = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ # for current in patent.finditer(content):
|
|
|
|
+ # results.append(current.groups())
|
|
|
|
+ # patent_info = re.search(regex, content).groups()[0].strip()
|
|
|
|
+ patent_info = re.findall(regex, content)
|
|
|
|
+ return patent_info
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def law_firm_extraction(content):
|
|
|
|
+ regex = r""
|
|
|
|
+ results = []
|
|
|
|
+ firm = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ for current in firm.finditer(content):
|
|
|
|
+ results.append(current.groups()[0].strip())
|
|
|
|
+ return results
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def on_behalf_of_extraction(content):
|
|
|
|
+ regex = "on\sbehalf\sof(.*?)(C|c)ase"
|
|
|
|
+ on_behalf_of = re.search(regex, content).groups()[0].strip()
|
|
|
|
+ return on_behalf_of
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def hourly_compensation(content):
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def ref_patents(content):
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def acronym_extraction(content):
|
|
|
|
+ regex = r"\([A-Z]+\)"
|
|
|
|
+ # results = []
|
|
|
|
+ # acronym = re.compile(regex, re.IGNORECASE)
|
|
|
|
+ # for current in acronym.finditer(content):
|
|
|
|
+ # results.append(current)
|
|
|
|
+ acronym = re.findall(regex, content)
|
|
|
|
+ return list(set(acronym))
|