java.lang.Object
fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils

public final class PathUtils extends Object
This class define utility method to manipulate the Hadoop Path object.
Since:
1.0
Author:
Laurent Jourdren
  • Nested Class Summary

    Nested Classes
    Modifier and Type
    Class
    Description
    static final class 
    Simple PathFilter to filter Paths with their beginning
    static final class 
    Simple PathFilter to filter Paths with their suffix
  • Method Summary

    Modifier and Type
    Method
    Description
    static void
    checkExistingDirectoryFile(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf, String msgFileType)
    Check if a directory exists
    static void
    checkExistingFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType)
    Check if a file exists
    static void
    checkExistingStandardFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType)
    Check if a file exists
    static void
    checkExistingStandardFileOrDirectory(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType)
    Check if a file exists
    static boolean
    concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, boolean deleteSource, boolean overwrite, org.apache.hadoop.conf.Configuration conf)
    Copy all files in a directory to one output file (merge).
    static boolean
    concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, boolean deleteSource, boolean overwrite, org.apache.hadoop.conf.Configuration conf, String addString)
    Copy all files in a directory to one output file (merge).
    static boolean
    concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, org.apache.hadoop.conf.Configuration conf)
    Copy all files in a directory to one output file (merge).
    static boolean
    copy(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean overwrite, org.apache.hadoop.conf.Configuration conf)
    Copy file from a path to another path.
    static boolean
    copy(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Copy file from a path to another path.
    static boolean
    copyAndCompressInputStreamToPath(InputStream is, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Copy bytes from an InputStream to a path.
    static boolean
    copyAndCompressLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, boolean removeSrcFile, org.apache.hadoop.conf.Configuration conf)
    Copy a local file to a path
    static boolean
    copyAndCompressLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Copy a local file to a path
    static boolean
    copyFromPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File destFile, boolean removeOriginalFile, org.apache.hadoop.conf.Configuration conf)
    Copy a file from a path to a local file
    static boolean
    copyFromPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File destFile, org.apache.hadoop.conf.Configuration conf)
    Copy a file from a path to a local file.
    static long
    copyInputStreamToPath(InputStream is, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Copy bytes from an InputStream to a path.
    static boolean
    copyLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, boolean removeSrcFile, org.apache.hadoop.conf.Configuration conf)
    Copy a local file to a path
    static boolean
    copyLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Copy a local file to a path
    static void
    copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean deleteSource, org.apache.hadoop.conf.Configuration conf)
    Merge several file of a directory into one file.
    static void
    copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean deleteSource, org.apache.hadoop.conf.Configuration conf, String addString)
    Merge several file of a directory into one file.
    static void
    copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Merge several file of a directory into one file.
    createInputStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Create an input stream from a path.
    createOutputStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Create an output stream from a path.
    static org.apache.hadoop.fs.Path
    createTempPath(org.apache.hadoop.fs.Path directory, String prefix, String suffix, org.apache.hadoop.conf.Configuration conf)
    Create a new temporary path.
    static boolean
    exists(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Test if a path exists
    static boolean
    fullyDelete(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Fully delete a file of the content of a directory
    static long
    getSize(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Get the length of a file.
    static boolean
    isExistingDirectoryFile(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf)
    Check if a directory exists
    static boolean
    isFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf)
    Check if a file exists
    static List<org.apache.hadoop.fs.Path>
    listPathsByPrefix(org.apache.hadoop.fs.Path dir, String prefix, boolean allowCompressedExtension, org.apache.hadoop.conf.Configuration conf)
    Return a list of the file of a path
    static List<org.apache.hadoop.fs.Path>
    listPathsByPrefix(org.apache.hadoop.fs.Path dir, String prefix, org.apache.hadoop.conf.Configuration conf)
    Return a list of the file of a path
    static List<org.apache.hadoop.fs.Path>
    listPathsBySuffix(org.apache.hadoop.fs.Path dir, String suffix, boolean allowCompressedExtension, org.apache.hadoop.conf.Configuration conf)
    Return a list of the file of a path
    static List<org.apache.hadoop.fs.Path>
    listPathsBySuffix(org.apache.hadoop.fs.Path dir, String suffix, org.apache.hadoop.conf.Configuration conf)
    Return a list of the file of a path
    static boolean
    mkdirs(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
    Create a directory.
    static boolean
    move(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean overwrite, org.apache.hadoop.conf.Configuration conf)
    Move file from a path to another path.
    static boolean
    move(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf)
    Move file from a path to another path.
    static org.apache.hadoop.fs.Path
    newPathWithOtherExtension(org.apache.hadoop.fs.Path path, String extension)
    Create a new path with the same parent directory and basename but without another extension.
    static void
    unZipPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File outputDir, boolean removeOriginalZipFile, org.apache.hadoop.conf.Configuration conf)
    Unzip a zip file on local file system.
    static void
    unZipPathToLocalFile(org.apache.hadoop.fs.Path path, File outputDir, org.apache.hadoop.conf.Configuration conf)
    Unzip a zip file on local file system.

    Methods inherited from class java.lang.Object

    equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
  • Method Details

    • createInputStream

      public static InputStream createInputStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Create an input stream from a path.
      Parameters:
      path - Path of the file to open
      conf - configuration
      Returns:
      an InputStream
      Throws:
      IOException - if an error occurs while creating InputStream
    • createOutputStream

      public static OutputStream createOutputStream(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Create an output stream from a path.
      Parameters:
      path - Path of the file to open
      conf - configuration
      Returns:
      an InputStream
      Throws:
      IOException - if an error occurs while creating InputStream
    • getSize

      public static long getSize(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Get the length of a file.
      Parameters:
      path - Path of the file to open
      conf - configuration
      Returns:
      an InputStream
      Throws:
      IOException - if an error occurs while creating InputStream
    • copyFromPathToLocalFile

      public static boolean copyFromPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File destFile, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a file from a path to a local file. Don't remove original file.
      Parameters:
      srcPath - Path of the file to copy
      destFile - Destination file
      conf - Configuration object * @return true if the copy is successful
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyFromPathToLocalFile

      public static boolean copyFromPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File destFile, boolean removeOriginalFile, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a file from a path to a local file
      Parameters:
      srcPath - Path of the file to copy
      destFile - Destination file
      removeOriginalFile - true if the original file must be deleted
      conf - Configuration object
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyLocalFileToPath

      public static boolean copyLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a local file to a path
      Parameters:
      srcFile - source file
      destPath - destination path
      conf - Configuration object
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyLocalFileToPath

      public static boolean copyLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, boolean removeSrcFile, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a local file to a path
      Parameters:
      srcFile - source file
      destPath - destination path
      removeSrcFile - true if the source file must be removed
      conf - Configuration object
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyInputStreamToPath

      public static long copyInputStreamToPath(InputStream is, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy bytes from an InputStream to a path.
      Parameters:
      is - the InputStream to read from
      destPath - destination path
      conf - Configuration object
      Returns:
      the number of bytes copied
      Throws:
      IOException - In case of an I/O problem
    • copyAndCompressLocalFileToPath

      public static boolean copyAndCompressLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a local file to a path
      Parameters:
      srcFile - source file
      destPath - destination path
      conf - Configuration object
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyAndCompressLocalFileToPath

      public static boolean copyAndCompressLocalFileToPath(File srcFile, org.apache.hadoop.fs.Path destPath, boolean removeSrcFile, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy a local file to a path
      Parameters:
      srcFile - source file
      destPath - destination path
      removeSrcFile - true if the source file must be removed
      conf - Configuration object
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying file
    • copyAndCompressInputStreamToPath

      public static boolean copyAndCompressInputStreamToPath(InputStream is, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy bytes from an InputStream to a path.
      Parameters:
      is - the InputStream to read from
      destPath - destination path
      conf - Configuration object
      Returns:
      the number of bytes copied
      Throws:
      IOException - In case of an I/O problem
    • unZipPathToLocalFile

      public static void unZipPathToLocalFile(org.apache.hadoop.fs.Path path, File outputDir, org.apache.hadoop.conf.Configuration conf) throws IOException
      Unzip a zip file on local file system. Don't remove original zip file.
      Parameters:
      path - Path of the zip file
      outputDir - Output directory of the content of the zip file
      conf - Configuration object
      Throws:
      IOException - if an error occurs while unzipping the file
    • unZipPathToLocalFile

      public static void unZipPathToLocalFile(org.apache.hadoop.fs.Path srcPath, File outputDir, boolean removeOriginalZipFile, org.apache.hadoop.conf.Configuration conf) throws IOException
      Unzip a zip file on local file system.
      Parameters:
      srcPath - Path of the zip file
      outputDir - Output directory of the content of the zip file
      removeOriginalZipFile - true if the original zip file must be removed
      conf - Configuration object
      Throws:
      IOException - if an error occurs while unzipping the file
    • fullyDelete

      public static boolean fullyDelete(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Fully delete a file of the content of a directory
      Parameters:
      path - Path of the file
      conf - Configuration Object
      Returns:
      true if the Path is successfully removed
      Throws:
      IOException - if cannot delete the file
    • copyMerge

      public static void copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Merge several file of a directory into one file.
      Parameters:
      srcPath - source directory path
      destPath - destination path
      conf - Configuration object
      Throws:
      IOException - if an error occurs while merging files
    • copyMerge

      public static void copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean deleteSource, org.apache.hadoop.conf.Configuration conf) throws IOException
      Merge several file of a directory into one file.
      Parameters:
      srcPath - source directory path
      destPath - destination path
      deleteSource - delete source files
      conf - Configuration object
      Throws:
      IOException - if an error occurs while merging files
    • copyMerge

      public static void copyMerge(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean deleteSource, org.apache.hadoop.conf.Configuration conf, String addString) throws IOException
      Merge several file of a directory into one file.
      Parameters:
      srcPath - source directory path
      destPath - destination path
      deleteSource - delete source files
      conf - Configuration object
      addString - string to add
      Throws:
      IOException - if an error occurs while merging files
    • newPathWithOtherExtension

      public static org.apache.hadoop.fs.Path newPathWithOtherExtension(org.apache.hadoop.fs.Path path, String extension)
      Create a new path with the same parent directory and basename but without another extension.
      Parameters:
      path - base path to use
      extension - extension to add
      Returns:
      a new Path object
    • listPathsByPrefix

      public static List<org.apache.hadoop.fs.Path> listPathsByPrefix(org.apache.hadoop.fs.Path dir, String prefix, org.apache.hadoop.conf.Configuration conf) throws IOException
      Return a list of the file of a path
      Parameters:
      dir - Path of the directory
      prefix - filter on suffix
      conf - Configuration
      Returns:
      a list of Path
      Throws:
      IOException - if an error occurs while listing the directory
    • listPathsByPrefix

      public static List<org.apache.hadoop.fs.Path> listPathsByPrefix(org.apache.hadoop.fs.Path dir, String prefix, boolean allowCompressedExtension, org.apache.hadoop.conf.Configuration conf) throws IOException
      Return a list of the file of a path
      Parameters:
      dir - Path of the directory
      prefix - filter on suffix
      allowCompressedExtension - Allow compressed extensions
      conf - Configuration
      Returns:
      a list of Path
      Throws:
      IOException - if an error occurs while listing the directory
    • listPathsBySuffix

      public static List<org.apache.hadoop.fs.Path> listPathsBySuffix(org.apache.hadoop.fs.Path dir, String suffix, org.apache.hadoop.conf.Configuration conf) throws IOException
      Return a list of the file of a path
      Parameters:
      dir - Path of the directory
      suffix - filter on suffix
      conf - Configuration
      Returns:
      a list of Path
      Throws:
      IOException - if an error occurs while listing the directory
    • listPathsBySuffix

      public static List<org.apache.hadoop.fs.Path> listPathsBySuffix(org.apache.hadoop.fs.Path dir, String suffix, boolean allowCompressedExtension, org.apache.hadoop.conf.Configuration conf) throws IOException
      Return a list of the file of a path
      Parameters:
      dir - Path of the directory
      suffix - filter on suffix
      allowCompressedExtension - Allow compressed extensions
      conf - Configuration
      Returns:
      a list of Path
      Throws:
      IOException - if an error occurs while listing the directory
    • createTempPath

      public static org.apache.hadoop.fs.Path createTempPath(org.apache.hadoop.fs.Path directory, String prefix, String suffix, org.apache.hadoop.conf.Configuration conf) throws IOException
      Create a new temporary path. Nothing is created on the file system.
      Parameters:
      directory - parent directory of the temporary file to create
      prefix - Prefix of the temporary file
      suffix - suffix of the temporary file
      conf - Configuration
      Returns:
      the new temporary file
      Throws:
      IOException - if there is an error creating the temporary directory
    • concat

      public static boolean concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy all files in a directory to one output file (merge).
      Parameters:
      paths - list of path files to concat
      dstPath - destination path
      conf - Configuration
      Returns:
      true if the concatenation is successful
      Throws:
      IOException - if an error occurs
    • concat

      public static boolean concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, boolean deleteSource, boolean overwrite, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy all files in a directory to one output file (merge).
      Parameters:
      paths - list of path files to concat
      dstPath - destination path
      deleteSource - true if the original files must be deleted
      overwrite - true if an existing destination file must be deleted
      conf - Configuration
      Returns:
      true if the concatenation is successful
      Throws:
      IOException - if an error occurs
    • concat

      public static boolean concat(List<org.apache.hadoop.fs.Path> paths, org.apache.hadoop.fs.Path dstPath, boolean deleteSource, boolean overwrite, org.apache.hadoop.conf.Configuration conf, String addString) throws IOException
      Copy all files in a directory to one output file (merge).
      Parameters:
      paths - list of path files to concat
      dstPath - destination path
      deleteSource - true if the original files must be deleted
      overwrite - true if an existing destination file must be deleted
      conf - Configuration
      addString - string to add
      Returns:
      true if the concatenation is successful
      Throws:
      IOException - if an error occurs
    • checkExistingFile

      public static void checkExistingFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType) throws IOException
      Check if a file exists
      Parameters:
      file - File to test
      conf - Configuration
      msgFileType - message for the description of the file
      Throws:
      IOException - if the file doesn't exists
    • checkExistingDirectoryFile

      public static void checkExistingDirectoryFile(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf, String msgFileType) throws IOException
      Check if a directory exists
      Parameters:
      directory - directory to test
      conf - Configuration
      conf - the configuration object
      msgFileType - message for the description of the file
      Throws:
      IOException - if the file doesn't exists
    • isExistingDirectoryFile

      public static boolean isExistingDirectoryFile(org.apache.hadoop.fs.Path directory, org.apache.hadoop.conf.Configuration conf) throws IOException
      Check if a directory exists
      Parameters:
      directory - directory to test
      conf - Configuration
      conf - the configuration object
      Returns:
      true is the directory exists
      Throws:
      IOException - if an error occurs
    • isFile

      public static boolean isFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf) throws IOException
      Check if a file exists
      Parameters:
      file - file to test
      conf - Configuration
      Returns:
      true is the directory exists
      Throws:
      IOException - if an unexpecting error occurs
    • checkExistingStandardFile

      public static void checkExistingStandardFile(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType) throws IOException
      Check if a file exists
      Parameters:
      file - File to test
      conf - Configuration
      conf - Configuration
      msgFileType - message for the description of the file
      Throws:
      IOException - if the file doesn't exists
    • checkExistingStandardFileOrDirectory

      public static void checkExistingStandardFileOrDirectory(org.apache.hadoop.fs.Path file, org.apache.hadoop.conf.Configuration conf, String msgFileType) throws IOException
      Check if a file exists
      Parameters:
      file - File to test
      conf - Configuration
      conf - Configuration
      msgFileType - message for the description of the file
      Throws:
      IOException - if the file doesn't exists
    • copy

      public static boolean copy(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy file from a path to another path.
      Parameters:
      srcPath - source path
      destPath - destination path
      conf - Configuration
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying
    • copy

      public static boolean copy(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean overwrite, org.apache.hadoop.conf.Configuration conf) throws IOException
      Copy file from a path to another path.
      Parameters:
      srcPath - source path
      destPath - destination path
      overwrite - true if existing files must be overwritten
      conf - Configuration
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying
    • move

      public static boolean move(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, org.apache.hadoop.conf.Configuration conf) throws IOException
      Move file from a path to another path.
      Parameters:
      srcPath - source path
      destPath - destination path
      conf - Configuration
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying
    • move

      public static boolean move(org.apache.hadoop.fs.Path srcPath, org.apache.hadoop.fs.Path destPath, boolean overwrite, org.apache.hadoop.conf.Configuration conf) throws IOException
      Move file from a path to another path.
      Parameters:
      srcPath - source path
      destPath - destination path
      overwrite - true if existing files must be overwritten
      conf - Configuration
      Returns:
      true if the copy is successful
      Throws:
      IOException - if an error occurs while copying
    • mkdirs

      public static boolean mkdirs(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Create a directory. If parent directories don't exists create it.
      Parameters:
      path - Path of the directory to create
      conf - Configuration
      Returns:
      true if the directory is successfully created
      Throws:
      IOException - if an error occurs while creating the directory
    • exists

      public static boolean exists(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf) throws IOException
      Test if a path exists
      Parameters:
      path - Path to test
      conf - Configuration
      Returns:
      true if the path exists
      Throws:
      IOException - if an error occurs while creating the directory