|
@@ -5,7 +5,8 @@ def extract_filing_date(content):
|
|
|
"""
|
|
|
extracts filing date from the documents.
|
|
|
"""
|
|
|
- pattern = r"((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4}|(Entered\:\s.*?)\w.*?\d{1,2}\,\s\d{4}|(Filed.*?)\d{1,2}\/\d{1,2}\/\d{1,2}|(Filed\:.+?)\w.+\d{1,2}\,\s\d{4}([\s\S].*?Paper))"
|
|
|
+ # pattern = r"((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4}|(Entered\:\s.*?)\w.*?\d{1,2}\,\s\d{4}|(Filed.*?)\d{1,2}\/\d{1,2}\/\d{1,2}|(Filed\:.+?)\w.+\d{1,2}\,\s\d{4}([\s\S].*?Paper))"
|
|
|
+ pattern = r"\w+\.?\s\d{1,2}\,\s\d{4}|\d{2}\/\d{2}\/\d{4}"
|
|
|
try:
|
|
|
return re.search(pattern, content).groups()[0]
|
|
|
except:
|
|
@@ -18,7 +19,7 @@ def address_extraction(content):
|
|
|
"""
|
|
|
regex = r"(OfficeAddress\:\s([\s\S].*)www|(A|a)ddress.+?\sof.+?Fax\:\s\(\d{3}\)\s\d{3}\-\d+\s)"
|
|
|
try:
|
|
|
- print(re.search(regex, content).groups()[0])
|
|
|
+ re.search(regex, content).groups()[0]
|
|
|
except:
|
|
|
return "None"
|
|
|
|
|
@@ -40,13 +41,16 @@ def refer_exteraction(content):
|
|
|
|
|
|
def email_extraction(content):
|
|
|
regex = r"(\w+\-)?\w+@[a-z]+\.[a-z]{2,3}"
|
|
|
- expert_name = re.compile(regex, re.IGNORECASE)
|
|
|
- for current in expert_name.finditer(content):
|
|
|
- print(current.group())
|
|
|
+ result = []
|
|
|
+ emails = re.compile(regex, re.IGNORECASE)
|
|
|
+ for email in emails.finditer(content):
|
|
|
+ result.append(email.group())
|
|
|
+ return result
|
|
|
|
|
|
|
|
|
def telephone_number_extraction(content):
|
|
|
regex = r"Telephone\:.+?\(\d{3}\)\s\d{3}\-\d{4}"
|
|
|
- expert_name = re.compile(regex, re.IGNORECASE)
|
|
|
- for current in expert_name.finditer(content):
|
|
|
- print(current.group())
|
|
|
+ try:
|
|
|
+ return re.search(regex, content).group()
|
|
|
+ except:
|
|
|
+ return "None"
|