web-dev-qa-db-fra.com

Filtrage des fichiers et répertoires os.walk ()

Je cherche un moyen d'inclure/exclure des modèles de fichiers et d'exclure des répertoires d'un appel os.walk().

Voici ce que je fais maintenant:

import fnmatch
import os

includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']

def _filter(paths):
    matches = []

    for path in paths:
        append = None

        for include in includes:
            if os.path.isdir(path):
                append = True
                break

            if fnmatch.fnmatch(path, include):
                append = True
                break

        for exclude in excludes:
            if os.path.isdir(path) and path == exclude:
                append = False
                break

            if fnmatch.fnmatch(path, exclude):
                append = False
                break

        if append:
            matches.append(path)

    return matches

for root, dirs, files in os.walk('/home/paulo-freitas'):
    dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
    files[:] = _filter(map(lambda f: os.path.join(root, f), files))

    for filename in files:
        filename = os.path.join(root, filename)

        print filename

La question est: y a-t-il une meilleure façon de faire cela? Comment?

38
Paulo Freitas

Cette solution utilise fnmatch.translate pour convertir les modèles glob en expressions régulières (elle suppose que l'include est uniquement utilisée pour les fichiers):

import fnmatch
import os
import os.path
import re

includes = ['*.doc', '*.odt'] # for files only
excludes = ['/home/paulo-freitas/Documents'] # for dirs and files

# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.'

for root, dirs, files in os.walk('/home/paulo-freitas'):

    # exclude dirs
    dirs[:] = [os.path.join(root, d) for d in dirs]
    dirs[:] = [d for d in dirs if not re.match(excludes, d)]

    # exclude/include files
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if not re.match(excludes, f)]
    files = [f for f in files if re.match(includes, f)]

    for fname in files:
        print fname
46
Oben Sonne

De docs.python.org

os.walk (top [ topdown = True [ onerror = None [ followlinks = False]]])

Lorsque topdown est défini sur True, l'appelant peut modifier la liste de noms de répertoire in-situ… ceci peut être utilisé pour clarifier la recherche…

for root, dirs, files in os.walk('/home/paulo-freitas', topdown=True):
    # excludes can be done with fnmatch.filter and complementary set,
    # but it's more annoying to read.
    dirs[:] = [d for d in dirs if d not in excludes] 
    for pat in includes:
        for f in fnmatch.filter(files, pat):
            print os.path.join(root, f)

Je dois signaler que le code ci-dessus suppose que excludes est un motif, pas un chemin complet. Vous devrez ajuster la compréhension de la liste pour filtrer si os.path.join(root, d) not in excludes doit correspondre au cas OP.

21
kojiro

pourquoi fnmatch?

import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
    for file in FILES:
       if file.endswith(('doc','odt')):
          print file
    for directory in DIR:
       if not directory in excludes :
          print directory

pas testé de manière exhaustive

7
kurumi

dirtools est parfait pour votre cas d'utilisation:

from dirtools import Dir

print(Dir('.', exclude_file='.gitignore').files())
1
michaeljoseph

Voici un exemple d'exclusion de répertoires et de fichiers avec os.walk():

ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
    src = os.path.abspath(src)
    src_prefix = len(src) + len(os.path.sep)
    for root, dirs, files in os.walk(src, onerror=onerror):
        for pattern in ignoreDirPatterns:
            if pattern in root:
                break
        else:
            #If the above break didn't work, this part will be executed
            for file in files:
                for pattern in ignoreFilePatterns:
                    if pattern in file:
                        break
                else:
                    #If the above break didn't work, this part will be executed
                    dirpath = os.path.join(dest, root[src_prefix:])
                    try:
                        os.makedirs(dirpath,exist_ok=True)
                    except OSError as e:
                        if onerror is not None:
                            onerror(e)
                    filepath=os.path.join(root,file)
                    shutil.copy(filepath,dirpath)
                continue;#If the above else didn't executed, this will be reached

        continue;#If the above else didn't executed, this will be reached

python> = 3.2 en raison de exist_ok dans makedirs

0
Jahid

Voici un moyen de le faire

import fnmatch
import os

excludes = ['/home/paulo-freitas/Documents']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
    for eachpath in excludes:
        if eachpath in path:
            continue
    else:
        for result in [os.path.abspath(os.path.join(path, filename)) for
                filename in files if fnmatch.fnmatch(filename,'*.doc') or fnmatch.fnmatch(filename,'*.odt')]:
            matches.append(result)
print matches
0
Senthil Kumaran
import os
includes = ['*.doc', '*.odt']
excludes = ['/home/paulo-freitas/Documents']
def file_search(path, exe):
for x,y,z in os.walk(path):
    for a in z:
        if a[-4:] == exe:
            print os.path.join(x,a)
        for x in includes:
            file_search(excludes[0],x)
0
juniour