API details: Module for defining config and downloading data from various sources like kaggle, fastai, github
from nbdev import *
# %nbdev_default_export data.external
# %nbdev_default_export data.external
True

Extend Configuration

class Config[source]

Config()

Setup config at ~/.fastai unless it exists already.

fname = URLs.PETS.split("/")[-1]
c_key='model'
local_path = URLs.LOCAL_PATH/('model' if c_key=='model' else 'data')/fname

local_path
Path('/Landmark2/pdo/Code/ALDAOps/nlphero/nbs/model/oxford-iiit-pet.tgz')
Config().d.keys()
dict_keys(['archive_path', 'data_path', 'model_path', 'storage_path', 'version'])
Config()[c_key]
Path('/Landmark2/pdo/.nlphero/models')
Config()['data']
Path('/Landmark2/pdo/.nlphero/data')
URLs.path()
Path('/home/ubuntu/.fastai/archive')
os.environ["FASTAI_HOME"] = str(Config().config_path.absolute())
URLs.path()
Path('/home/ubuntu/.fastai/archive')
URLs.path()
Path('/home/ubuntu/.fastai/archive')

Extend URL

class URLs[source]

URLs()

Global constants for dataset and model URLs.

URLs.path()
Path('/Landmark2/pdo/.nlphero/archive')

Kaggle Dataset Utilities

This is going to be a helper class for downloading Kaggle dataset in required structure

class KAGGLEs[source]

KAGGLEs()

KAGGLEs.path()
Path('/Landmark2/pdo/.nlphero/archive')
URLs.path(URLs.PETS)
Path('/Landmark2/pdo/.nlphero/archive/oxford-iiit-pet.tgz')
URLs.path(URLs.PETS, c_key="data")
Path('/Landmark2/pdo/.nlphero/data/oxford-iiit-pet.tgz')
KAGGLEs.path(KAGGLEs.SPOOKY_AUTHOR_IDENTIFICATION)
Path('/Landmark2/pdo/.nlphero/archive/spooky-author-identification')
KAGGLEs.path(KAGGLEs.SPOOKY)
Path('/Landmark2/pdo/.nlphero/archive/spooky')
KAGGLEs.ktype(KAGGLEs.SPOOKY)
'datasets'
!cat /Landmark2/pdo/.nlphero/config.yml
archive_path: /Landmark2/pdo/.nlphero/archive
data_path: /Landmark2/pdo/.nlphero/data
model_path: /Landmark2/pdo/.nlphero/models
storage_path: /tmp
version: 2

download_kaggle[source]

download_kaggle(url, dest, overwrite=False)

Download url to dest unless it exists and not overwrite

download_data[source]

download_data(url, fname=None, c_key='archive', force_download=False, timeout=4)

Download url to fname.

 
 
 
KAGGLEs.SPOOKY
'kaggle_datasets::abhishek/spooky'
KAGGLEs.SPOOKY.startswith("kaggle")
True

untar_data[source]

untar_data(url, fname=None, dest=None, c_key='data', force_download=False, extract_func=file_extract, timeout=4)

Download url to fname if dest doesn't exist, and un-tgz or unzip to folder dest.

 
is_kaggle = KAGGLEs.SPOOKY.startswith("kaggle")
T = [URLs, KAGGLEs][is_kaggle]
T
__main__.KAGGLEs
URLs
__main__.URLs
T.path()
Path('/Landmark2/pdo/.nlphero/archive')
c_key='data'
# url = KAGGLEs.SPOOKY
url = URLs.PETS
is_kaggle =url.startswith("kaggle")
T = [URLs, KAGGLEs][is_kaggle]
dest=None
fname = None
default_dest = T.path(url, c_key=c_key).with_suffix('')
dest = default_dest if dest is None else Path(dest)/default_dest.name
fname = Path(fname or T.path(url))
default_dest
Path('/Landmark2/pdo/.nlphero/data/oxford-iiit-pet')
dest.parent
Path('/Landmark2/pdo/.nlphero/data')
fname.suffix
'.tgz'

list_checked_data[source]

list_checked_data()

 
# pd.DataFrame([path.name for path in KAGGLEs.path(c_key='data').ls()])
print("Check")
Check

Bing Image Dataset Utilities

search_images_bing[source]

search_images_bing(key, term, count=150, min_sz=128)

('/Landmark2/pdo/.nlphero/data', '/Landmark2/pdo/.nlphero/data')

construct_image_dataset[source]

construct_image_dataset(clstypes, dest, key=None, loc=None, count=150)

list_ds[source]

list_ds(loc=None)

# loc =os.path.join(os.getenv("NLPHERO_HOME"), "data")
# if Path(loc).exists(): print(list_ds(loc))
clstypes = "Sarah Shahi", "Mercedes Masohn", "Jeffrey Dean Morgan", "Javier Bardem", "Benicio Del Toro", "Brad Pitt"
dest = "DoppelgangerMixed"
count=150
 
 
 

get_ds[source]

get_ds(name, loc=None)