Source code for bsync.sync
from pathlib import Path
import csv
import os
from click import UsageError
from boxsdk.object.folder import Folder
from bsync.settings import PATH_SEP
[docs]class BoxSync:
"""
Syncs the parent folder files to Box.
Compares current files, checks for any missing in Box or any changed locally,
creates directory structure and finally uploads all files
"""
def __init__(self, api, logger, box_folder_id, source_folder_paths):
self.api = api
self.logger = logger
self.box_folder_id = int(box_folder_id)
self.glob = '*'
if PATH_SEP in str(source_folder_paths):
self.source_folder, self.glob = source_folder_paths.split(PATH_SEP)
self.source_folder = Path(self.source_folder).expanduser()
else:
self.source_folder = Path(source_folder_paths).expanduser()
if not self.source_folder.is_dir():
raise UsageError(f'Source folder {self.source_folder} is not a directory')
self.logger.debug(f'Scanning paths matching {self.glob} in folder {self.source_folder}')
self.changes = []
self._parent = None
@property
def parent_folder(self):
"""
Gets the parent folder in Box via API GET
"""
if self._parent:
return self._parent
self._parent = self.api.client.folder(self.box_folder_id).get()
return self._parent
[docs] def to_path(self, item):
"""
Converts a Box File/Folder to a filepath from the parent folder
"""
path_collection = item.get(fields=['path_collection']).path_collection
item_path = None
for entry in path_collection['entries']:
if entry == self.parent_folder:
item_path = self.source_folder
elif item_path is not None:
item_path = item_path / entry.name
return item_path / item['name']
[docs] def get_box_paths(self, folder_id=None):
"""
Yields all paths recursively from the parent folder ID
"""
if folder_id is None:
folder_id = self.box_folder_id
for item in self.api.client.folder(folder_id).get_items():
yield self.to_path(item), item
if isinstance(item, Folder):
yield from self.get_box_paths(item._object_id)
[docs] def prepare(self):
"""
Loads entries from local filesystem and Box
Used to decide later which items to sync
"""
local_paths = list(self.source_folder.rglob(self.glob))
self.local_files = [path for path in local_paths if path.is_file()]
self.local_dirs = [path for path in local_paths if path.is_dir()]
self.new_dirs = {}
self.box_paths = dict(self.get_box_paths())
[docs] def get_parent(self, path):
"""
Returns the Box Folder object for the parent folder of path
"""
parent = path.parent
if parent == self.source_folder:
return self.parent_folder
elif parent in self.box_paths:
return self.box_paths[parent]
elif parent in self.new_dirs:
return self.new_dirs[parent]
raise ValueError(f'Unable to resolve folder path: {parent}')
[docs] def sync_folders(self):
"""
Creates the subfolders in Box.com to match local filesystem
Runs before new files are updated/uploaded
"""
for path in self.local_dirs:
if path not in self.box_paths:
parent = self.get_parent(path)
self.new_dirs[path] = subfolder = self.api.create_folder(parent._object_id, path.name)
self.changes.append((parent, subfolder))
[docs] def has_changed(self, boxfile, path):
"""
Compares the file on Box with the path on disk
Used to see if the local file has changed
"""
# TODO: compare sha1? expensive for large local files
return boxfile.get(fields=['size']).size != os.stat(path).st_size
[docs] def sync_files(self):
"""
Uploads the new or updated files to Box.com
Folder structure must be created before running
"""
for path in self.local_files:
parent = self.get_parent(path)
if path not in self.box_paths:
new_file = self.api.upload(parent._object_id, path.resolve())
self.changes.append((parent, new_file))
else:
boxfile = self.box_paths[path]
if self.has_changed(boxfile, path):
updated_file = self.api.update(boxfile._object_id, path.resolve())
self.changes.append((parent, updated_file))
[docs] def run(self):
"""
Main method that finds local files and matching files on Box.
Then syncs the folder/subfolder structure and finally syncs any files to Box from the local machine
"""
self.prepare()
self.logger.info(f'Syncing {len(self.local_files)} files in '
f'{len(self.local_dirs) + 1} folders from {self.source_folder}')
self.sync_folders()
self.sync_files()
if not self.changes:
self.logger.warning('No changes detected')
[docs] def output(self, filename):
"""
Writes output CSV of what files are synced and their destinations in Box
"""
header = ('Item Type', 'Parent Folder ID', 'Parent Folder Name', 'Item ID', 'Item Name')
with open(filename, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(header)
for parent, item in self.changes:
writer.writerow([item.__class__.__name__, parent._object_id, parent.name,
item._object_id, item.name])