2 jaren geleden · 92e83c5949
--- a/document_download_from_server/db_utils.py
+++ b/document_download_from_server/db_utils.py
@@ -0,0 +1,16 @@
 
				+from utils import Base, metadata_rc, session_rc
			
 
				+from sqlalchemy import Table
			
 
				+
			
 
				+
			
 
				+class Documents(Base):
			
 
				+    __table__ = Table("core_document", metadata_rc)
			
 
				+
			
 
				+
			
 
				+def get_documents():
			
 
				+    documents = session_rc.query(Documents).all()
			
 
				+    for document in documents:
			
 
				+        yield document
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    get_documents()
			
--- a/document_download_from_server/documents_download.py
+++ b/document_download_from_server/documents_download.py
@@ -0,0 +1,24 @@
 
				+from asyncio.subprocess import PIPE
			
 
				+import os
			
 
				+import subprocess
			
 
				+from subprocess import Popen
			
 
				+from db_utils import get_documents
			
 
				+
			
 
				+
			
 
				+def main(PATH):
			
 
				+    itr = 0
			
 
				+    for document in get_documents():
			
 
				+        url = document.url
			
 
				+        args = ["wget", "-r", "-l", "1", "-p", "-P", PATH, url]
			
 
				+        Popen(args, stdout=PIPE)
			
 
				+        itr += 1
			
 
				+        if itr == 1:
			
 
				+            break
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    HOME_DIR = os.path.expanduser("~")
			
 
				+    BASE_DIR = "Code/pdf_parser/document_download_from_server"
			
 
				+
			
 
				+    PATH = os.path.join(HOME_DIR, BASE_DIR)
			
 
				+    main(PATH)
			
--- a/document_download_from_server/utils.py
+++ b/document_download_from_server/utils.py
@@ -0,0 +1,14 @@
 
				+import os
			
 
				+
			
 
				+from sqlalchemy import create_engine, MetaData, orm
			
 
				+from sqlalchemy.ext.declarative import declarative_base
			
 
				+
			
 
				+pengine_rc = create_engine("postgresql://xpertconnect:123@localhost:5432/rsa_crawling")
			
 
				+
			
 
				+Base = declarative_base()
			
 
				+metadata_rc = MetaData(pengine_rc)
			
 
				+metadata_rc.reflect()
			
 
				+
			
 
				+Session_rc = orm.sessionmaker(pengine_rc)
			
 
				+
			
 
				+session_rc = Session_rc()