|
@@ -1,19 +1,27 @@
|
|
|
-from collections import defaultdict
|
|
|
import re
|
|
|
-from distutils.filelist import findall
|
|
|
|
|
|
|
|
|
def date_extraction(content):
|
|
|
- regex = r"((\d{2}\/\d{3}\,\d{3}\s)\d{1,2}\/\d{2}\/\d{4}|(Entered\:\s|:Filing\sDate\:\s|Date\:\s)[A-Z]\w+\s\d{1,2}\,\s\d{4})"
|
|
|
- date = re.search(regex, content).groups()[0]
|
|
|
- return date.strip()
|
|
|
+ """
|
|
|
+ extracts filing date from the documents.
|
|
|
+ """
|
|
|
+ pattern = r"((FILING\sDATE.*?)\d{1,2}\/\d{1,2}\/\d{2,4}|(Date\:\s)\w{1,9}\s\d{1,2}\,\s\d{4}|(Entered\:\s.*?)\w.*?\d{1,2}\,\s\d{4}|(Filed.*?)\d{1,2}\/\d{1,2}\/\d{1,2})"
|
|
|
+ try:
|
|
|
+ return re.search(pattern, content).groups()[0]
|
|
|
+ except:
|
|
|
+ return "None"
|
|
|
+
|
|
|
|
|
|
def address_extraction(content):
|
|
|
- regex = r"Address\:([\s\S].*)\s\d{6}\-\d{4}\swww"
|
|
|
- data = re.search(regex, content).groups()[0]
|
|
|
- if data == None:
|
|
|
- print("N")
|
|
|
- return data.strip()
|
|
|
+ """
|
|
|
+ extracts address from the documents.
|
|
|
+ """
|
|
|
+ regex = r"Address\:\s([\s\S].*)www"
|
|
|
+ try:
|
|
|
+ print(re.search(regex, content).groups()[0])
|
|
|
+ except:
|
|
|
+ return "None"
|
|
|
+
|
|
|
|
|
|
def case_number_extraction(content):
|
|
|
|