documents_download.py 566 B

123456789101112131415161718192021222324
  1. from asyncio.subprocess import PIPE
  2. import os
  3. import subprocess
  4. from subprocess import Popen
  5. from db_utils import get_documents
  6. def main(PATH):
  7. itr = 0
  8. for document in get_documents():
  9. url = document.url
  10. args = ["wget", "-r", "-l", "1", "-p", "-P", PATH, url]
  11. Popen(args, stdout=PIPE)
  12. itr += 1
  13. if itr == 1:
  14. break
  15. if __name__ == "__main__":
  16. HOME_DIR = os.path.expanduser("~")
  17. BASE_DIR = "Code/pdf_parser/document_download_from_server"
  18. PATH = os.path.join(HOME_DIR, BASE_DIR)
  19. main(PATH)