|
@@ -21,7 +21,7 @@ def main(PATH):
|
|
|
data_dict = defaultdict(list)
|
|
|
required_files = [file for file in os.listdir(PATH) if file.find(".pdf") != -1]
|
|
|
for file in required_files:
|
|
|
- content = parser.from_file(file)["content"].strip().replace("\n", "")
|
|
|
+ content = parser.from_file(file)["content"]
|
|
|
data_dict["document_name"].append(file)
|
|
|
data_dict["filing_date"].append(extract_filing_date(content))
|
|
|
data_dict["address"].append(address_extraction(content))
|