|
@@ -6,7 +6,7 @@ from tika import parser
|
|
from utils import (
|
|
from utils import (
|
|
extract_filing_date,
|
|
extract_filing_date,
|
|
address_extraction,
|
|
address_extraction,
|
|
- refer_exteraction,
|
|
|
|
|
|
+ extract_reference,
|
|
email_extraction,
|
|
email_extraction,
|
|
telephone_number_extraction,
|
|
telephone_number_extraction,
|
|
)
|
|
)
|
|
@@ -25,7 +25,7 @@ def main(PATH):
|
|
data_dict["document_name"].append(file)
|
|
data_dict["document_name"].append(file)
|
|
data_dict["filing_date"].append(extract_filing_date(content))
|
|
data_dict["filing_date"].append(extract_filing_date(content))
|
|
data_dict["address"].append(address_extraction(content))
|
|
data_dict["address"].append(address_extraction(content))
|
|
- data_dict["refer"].append(refer_exteraction(content))
|
|
|
|
|
|
+ data_dict["references"].append(extract_reference(content))
|
|
data_dict["email"].append(email_extraction(content))
|
|
data_dict["email"].append(email_extraction(content))
|
|
data_dict["telephone_number"].append(telephone_number_extraction(content))
|
|
data_dict["telephone_number"].append(telephone_number_extraction(content))
|
|
|
|
|