|
@@ -1,20 +1,17 @@
|
|
|
|
+from collections import defaultdict
|
|
|
|
+import os
|
|
|
|
+import tika
|
|
|
|
+from tika import parser
|
|
|
|
+import pandas as pd
|
|
from parse_expert_pdf_utils import (
|
|
from parse_expert_pdf_utils import (
|
|
- defendent_extraction,
|
|
|
|
plaintiff_extraction,
|
|
plaintiff_extraction,
|
|
defendent_extraction,
|
|
defendent_extraction,
|
|
expert_name_extraction,
|
|
expert_name_extraction,
|
|
case_number_extraction,
|
|
case_number_extraction,
|
|
)
|
|
)
|
|
-import tika
|
|
|
|
-import os
|
|
|
|
-from tika import parser
|
|
|
|
-import pandas as pd
|
|
|
|
-from collections import defaultdict
|
|
|
|
|
|
|
|
-tika.initVM()
|
|
|
|
-import warnings
|
|
|
|
|
|
|
|
-warnings.filterwarnings("ignore")
|
|
|
|
|
|
+tika.initVM()
|
|
|
|
|
|
|
|
|
|
def main(path: str) -> pd.DataFrame:
|
|
def main(path: str) -> pd.DataFrame:
|
|
@@ -33,8 +30,8 @@ def main(path: str) -> pd.DataFrame:
|
|
data["defendant"].append(defendent_extraction(content))
|
|
data["defendant"].append(defendent_extraction(content))
|
|
data["case_number"].append(case_number_extraction(content))
|
|
data["case_number"].append(case_number_extraction(content))
|
|
|
|
|
|
- df = pd.DataFrame(data)
|
|
|
|
- return df
|
|
|
|
|
|
+ data_expert = pd.DataFrame(data)
|
|
|
|
+ return data_expert
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|