|
@@ -87,11 +87,11 @@ def patent_extraction(content):
|
|
Extracts patent numbers from the document
|
|
Extracts patent numbers from the document
|
|
"""
|
|
"""
|
|
regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?"
|
|
regex = r"\d{1,3}\,\d{1,3}\,\d{3}\,?"
|
|
- results = []
|
|
|
|
|
|
+ result = set()
|
|
patent = re.compile(regex, re.IGNORECASE)
|
|
patent = re.compile(regex, re.IGNORECASE)
|
|
for current in patent.finditer(content):
|
|
for current in patent.finditer(content):
|
|
- results.append(current.group().replace(",", ""))
|
|
|
|
- return list(set(results))
|
|
|
|
|
|
+ result.add(current.group().replace(",", ""))
|
|
|
|
+ return list(result)
|
|
|
|
|
|
|
|
|
|
def law_firm_extraction(content):
|
|
def law_firm_extraction(content):
|
|
@@ -113,9 +113,11 @@ def hourly_compensation(content):
|
|
"""
|
|
"""
|
|
Returns the hourly compensation of the expert.
|
|
Returns the hourly compensation of the expert.
|
|
"""
|
|
"""
|
|
- regex = "\$\s?\d+"
|
|
|
|
- pay = re.findall(regex, content)
|
|
|
|
- return pay
|
|
|
|
|
|
+ regex = "\$\s?\d{3,4}"
|
|
|
|
+ try:
|
|
|
|
+ return re.search(regex, content).group(0)
|
|
|
|
+ except:
|
|
|
|
+ return "None"
|
|
|
|
|
|
|
|
|
|
def ref_patents(content):
|
|
def ref_patents(content):
|