|
@@ -5,7 +5,7 @@ def extract_filing_date(content):
|
|
|
"""
|
|
|
extracts filing date from the documents.
|
|
|
"""
|
|
|
- pattern = r"((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4}|(Entered\:\s.*?)\w.*?\d{1,2}\,\s\d{4}|(Filed.*?)\d{1,2}\/\d{1,2}\/\d{1,2})" # "((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4})"
|
|
|
+ pattern = r"((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4}|(Entered\:\s.*?)\w.*?\d{1,2}\,\s\d{4}|(Filed.*?)\d{1,2}\/\d{1,2}\/\d{1,2}|(Filed\:.+?)\w.+\d{1,2}\,\s\d{4}([\s\S].*?Paper))"
|
|
|
try:
|
|
|
return re.search(pattern, content).groups()[0]
|
|
|
except:
|
|
@@ -16,7 +16,7 @@ def address_extraction(content):
|
|
|
"""
|
|
|
extracts address from the documents.
|
|
|
"""
|
|
|
- regex = r"OfficeAddress\:\s([\s\S].*)www"
|
|
|
+ regex = r"(OfficeAddress\:\s([\s\S].*)www|(A|a)ddress.+?\sof.+?Fax\:\s\(\d{3}\)\s\d{3}\-\d+\s)"
|
|
|
try:
|
|
|
print(re.search(regex, content).groups()[0])
|
|
|
except:
|
|
@@ -27,7 +27,7 @@ def refer_exteraction(content):
|
|
|
"""
|
|
|
extract referals from the documents.
|
|
|
"""
|
|
|
- regex = r"by\sreference.+?\d{1,2}\,\d{3}\,\d{3}|In\sre.+?\)"
|
|
|
+ regex = r"((by\sreference\sU\.S\.\sPatent\sNo.\s\d{1,2}\,\d{3}\,\d{3})|(In\sre\s\w+.+?\,?\s\d{2,3}\sF\.\dd\s\d{0,4}\,?\s?\d{0,4}\s?\(?.+?\)))" # |In\sre.+?\)|In\sre.+?\)"
|
|
|
# 1. by reference
|
|
|
# 2. In re
|
|
|
# 3. in qoutes ""
|