Source code for bsync.sync

from pathlib import Path
import csv
import os
import asyncio
from concurrent.futures import ThreadPoolExecutor as Executor

from click import UsageError
from boxsdk.object.folder import Folder

from bsync.settings import PATH_SEP


[docs]class BoxSync: """ Syncs the parent folder files to Box. Compares current files, checks for any missing in Box or any changed locally, creates directory structure and finally uploads all files """ def __init__(self, api, logger, concurrency, box_folder_id, source_folder_paths): self.api = api self.logger = logger self.box_folder_id = int(box_folder_id) self.glob = '*' if PATH_SEP in str(source_folder_paths): self.source_folder, self.glob = source_folder_paths.split(PATH_SEP) self.source_folder = Path(self.source_folder).expanduser() else: self.source_folder = Path(source_folder_paths).expanduser() if not self.source_folder.is_dir(): raise UsageError(f'Source folder {self.source_folder} is not a directory') self.logger.debug(f'Scanning paths matching {self.glob} in folder {self.source_folder}') self.changes = [] self._parent = None self.executor = Executor(concurrency) self.loop = asyncio.get_event_loop() @property def parent_folder(self): """ Gets the parent folder in Box via API GET """ if self._parent: return self._parent self._parent = self.api.client.folder(self.box_folder_id).get() return self._parent
[docs] def to_path(self, item): """ Converts a Box File/Folder to a filepath from the parent folder """ path_collection = item.get(fields=['path_collection']).path_collection item_path = None for entry in path_collection['entries']: if entry == self.parent_folder: item_path = self.source_folder elif item_path is not None: item_path = item_path / entry.name return item_path / item['name']
[docs] def get_box_paths(self, folder_id=None): """ Yields all paths recursively from the parent folder ID """ if folder_id is None: folder_id = self.box_folder_id for item in self.api.client.folder(folder_id).get_items(): yield self.to_path(item), item if isinstance(item, Folder): yield from self.get_box_paths(item._object_id)
[docs] def prepare(self): """ Loads entries from local filesystem and Box Used to decide later which items to sync """ self.logger.info('Loading local path info') local_paths = list(sorted(self.source_folder.rglob(self.glob))) self.local_files = [path for path in local_paths if path.is_file()] self.local_dirs = [path for path in local_paths if path.is_dir()] self.new_dirs = {} self.logger.info('Loading Box.com path info') self.box_paths = dict(self.get_box_paths())
[docs] def get_parent(self, path): """ Returns the Box Folder object for the parent folder of path """ parent = path.parent if parent == self.source_folder: return self.parent_folder elif parent in self.box_paths: return self.box_paths[parent] elif parent in self.new_dirs: return self.new_dirs[parent] raise ValueError(f'Unable to resolve folder path: {parent}')
[docs] def sync_folders(self): """ Creates the subfolders in Box.com to match local filesystem Runs before new files are updated/uploaded """ for path in self.local_dirs: if path not in self.box_paths: parent = self.get_parent(path) self.new_dirs[path] = subfolder = self.api.create_folder(parent._object_id, path.name) self.changes.append((parent, subfolder))
[docs] def has_changed(self, boxfile, path): """ Compares the file on Box with the path on disk Used to see if the local file has changed """ # TODO: compare sha1? expensive for large local files return boxfile.get(fields=['size']).size != os.stat(path).st_size
def _run_upload(self, method, object_id, path): return self.loop.run_in_executor(self.executor, method, object_id, path)
[docs] async def sync_files(self): """ Uploads the new or updated files to Box.com Folder structure must be created before running """ parents, tasks = [], [] for path in self.local_files: parent = self.get_parent(path) parents.append(parent) if path not in self.box_paths: tasks.append(self._run_upload(self.api.upload, parent._object_id, path)) else: boxfile = self.box_paths[path] if self.has_changed(boxfile, path): tasks.append(self._run_upload(self.api.update, boxfile._object_id, path)) completed, _ = await asyncio.wait(tasks) results = zip(parents, [t.result() for t in completed]) self.changes.extend(results)
[docs] def run(self): """ Main method that finds local files and matching files on Box. Then syncs the folder/subfolder structure and finally syncs any files to Box from the local machine """ self.prepare() self.logger.info(f'Syncing {len(self.local_files)} files in ' f'{len(self.local_dirs) + 1} folders from {self.source_folder}') self.sync_folders() self.loop.run_until_complete(self.sync_files()) if not self.changes: self.logger.warning('No changes detected')
[docs] def output(self, filename): """ Writes output CSV of what files are synced and their destinations in Box """ header = ('Item Type', 'Parent Folder ID', 'Parent Folder Name', 'Item ID', 'Item Name') with open(filename, 'w') as outfile: writer = csv.writer(outfile) writer.writerow(header) for parent, item in self.changes: writer.writerow([item.__class__.__name__, parent._object_id, parent.name, item._object_id, item.name])