############################################################################## # @file ExternalData.cmake # @brief Manage data files stored outside the source tree. # # Copyright 2010-2011 Kitware, Inc. All rights reserved. # File modified by Andreas Schuh. # # @ingroup CMakeTools ############################################################################## # - Manage data files stored outside source tree # Use this module to unambiguously reference data files stored outside the # source tree and fetch them at build time from arbitrary local and remote # content-addressed locations. Functions provided by this module recognize # arguments with the syntax "DATA{<name>}" as references to external data, # replace them with full paths to local copies of those data, and create build # rules to fetch and update the local copies. # # The DATA{} syntax is literal and the <name> is a full or relative path # within the source tree. The source tree must contain either a real data # file at <name> or a "content link" at <name><ext> containing a hash of the # real file using a hash algorithm corresponding to <ext>. For example, the # argument "DATA{img.png}" may be satisfied by either a real "img.png" file in # the current source directory or a "img.png.md5" file containing its MD5 sum. # # The 'ExternalData_Expand_Arguments' function evaluates DATA{} references # in its arguments and constructs a new list of arguments: # ExternalData_Expand_Arguments( # <target> # Name of data management target # <outVar> # Output variable # [args...] # Input arguments, DATA{} allowed # ) # It replaces each DATA{} reference argument with the full path of a real # data file on disk that will exist after the <target> builds. # # The 'ExternalData_Add_Test' function wraps around the CMake add_test() # command but supports DATA{} reference arguments: # ExternalData_Add_Test( # <target> # Name of data management target # ... # Arguments of add_test(), DATA{} allowed # ) # It passes its arguments through ExternalData_Expand_Arguments and then # invokes add_test() using the results. # # The 'ExternalData_Add_Target' function creates a custom target to manage # local instances of data files stored externally: # ExternalData_Add_Target( # <target> # Name of data management target # ) # It creates custom commands in the target as necessary to make data files # available for each DATA{} reference previously evaluated by other functions # provided by this module. A list of URL templates must be provided in the # variable ExternalData_URL_TEMPLATES using the placeholders "%(algo)" and # "%(hash)" in each template. Data fetch rules try each URL template in order # by substituting the hash algorithm name for "%(algo)" and the hash value for # "%(hash)". # # The following hash algorithms are supported: # %(algo) <ext> Description # ------- ----- ----------- # MD5 .md5 Message-Digest Algorithm 5, RFC 1321 # Note that the hashes are used only for unique data identification and # download verification. This is not security software. # # Example usage: # include(ExternalData) # set(ExternalData_URL_TEMPLATES "file:///local/%(algo)/%(hash)" # "http://data.org/%(algo)/%(hash)") # ExternalData_Add_Test(MyData # NAME MyTest # COMMAND MyExe DATA{MyInput.png} # ) # ExternalData_Add_Target(MyData) # When test "MyTest" runs the "DATA{MyInput.png}" argument will be replaced by # the full path to a real instance of the data file "MyInput.png" on disk. If # the source tree contains a content link such as "MyInput.png.md5" then the # "MyData" target creates a real "MyInput.png" in the build tree. # # The DATA{} syntax can automatically recognize and fetch a file series. If # the source tree contains a group of files or content links named like a # series then a DATA{} reference to one member adds rules to fetch all of # them. Although all members of a series are fetched, only the file # originally named by the DATA{} argument is substituted for it. Two # variables configure recognition of a series from DATA{<name>}. First, # ExternalData_SERIES_PARSE is a regex of the form "^(...)(...)(...)$" to # parse <prefix>, <number>, and <suffix> parts from <name>. Second, # ExternalData_SERIES_MATCH is a regex matching the <number> part of series # members named <prefix><number><suffix>. Note that the <suffix> of a series # does not include a hash-algorithm extension. Both series configuration # variables have default values that work well for common cases. # # The variable ExternalData_LINK_CONTENT may be set to the name of a supported # hash algorithm to enable automatic conversion of real data files referenced # by the DATA{} syntax into content links. For each such <file> a content # link named "<file><ext>" is created. The original file is renamed to the # form ".ExternalData_<algo>_<hash>" to stage it for future transmission to # one of the locations in the list of URL templates (by means outside the # scope of this module). The data fetch rule created for the content link # will use the staged object if it cannot be found using any URL template. # # The variable ExternalData_SOURCE_ROOT may be set to the highest source # directory containing any path named by a DATA{} reference. The default is # CMAKE_SOURCE_DIR. ExternalData_SOURCE_ROOT and CMAKE_SOURCE_DIR must refer # to directories within a single source distribution (e.g. they come together # in one tarball). #============================================================================= # Copyright 2010-2011 Kitware, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # * Neither the names of Kitware, Inc., the Insight Software Consortium, # nor the names of their contributors may be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #============================================================================= ############################################################################## # @brief @todo Document function. # # @param [in] target Name of the test. function(ExternalData_add_test target) ExternalData_expand_arguments("${target}" testArgs ${ARGN}) add_test(${testArgs}) endfunction() ############################################################################## # @brief @todo Document funtion. # # @param [in] target Name of the external data target. function(ExternalData_add_target target) if(NOT ExternalData_URL_TEMPLATES) message(FATAL_ERROR "ExternalData_URL_TEMPLATES is not set!") endif() set(config ${CMAKE_CURRENT_BINARY_DIR}/${target}_config.cmake) configure_file(${_ExternalData_SELF_DIR}/ExternalData_config.cmake.in ${config} @ONLY) set(files "") # Set "_ExternalData_FILE_${file}" for each output file to avoid duplicate # rules. Use local data first to prefer real files over content links. # Custom commands to copy or link local data. get_property(data_local GLOBAL PROPERTY _ExternalData_${target}_LOCAL) foreach(entry IN LISTS data_local) string(REPLACE "|" ";" tuple "${entry}") list(GET tuple 0 file) list(GET tuple 1 name) if(NOT DEFINED "_ExternalData_FILE_${file}") set("_ExternalData_FILE_${file}" 1) add_custom_command( COMMENT "Generating ${file}" OUTPUT "${file}" COMMAND ${CMAKE_COMMAND} -Drelative_top=${CMAKE_BINARY_DIR} -Dfile=${file} -Dname=${name} -DExternalData_ACTION=local -DExternalData_CONFIG=${config} -P ${_ExternalData_SELF} DEPENDS "${name}" ) list(APPEND files "${file}") endif() endforeach() # Custom commands to fetch remote data. get_property(data_fetch GLOBAL PROPERTY _ExternalData_${target}_FETCH) foreach(entry IN LISTS data_fetch) string(REPLACE "|" ";" tuple "${entry}") list(GET tuple 0 file) list(GET tuple 1 name) list(GET tuple 2 ext) if(NOT DEFINED "_ExternalData_FILE_${file}") set("_ExternalData_FILE_${file}" 1) add_custom_command( # Users care about the data file, so hide the hash/timestamp file. COMMENT "Generating ${file}" # The hash/timestamp file is the output from the build perspective. # List the real file as a second output in case it is a broken link. # The files must be listed in this order so CMake can hide from the # make tool that a symlink target may not be newer than the input. OUTPUT "${file}${ext}" "${file}" # Run the data fetch/update script. COMMAND ${CMAKE_COMMAND} -DExternalData_OBJECT_DIR=${CMAKE_BINARY_DIR}/ExternalData/Objects -Drelative_top=${CMAKE_BINARY_DIR} -Dfile=${file} -Dname=${name} -Dext=${ext} -DExternalData_ACTION=fetch -DExternalData_CONFIG=${config} -P ${_ExternalData_SELF} # Update whenever the object hash changes. DEPENDS "${name}${ext}" ) list(APPEND files "${file}${ext}") endif() endforeach() # Custom target to drive all update commands. add_custom_target(${target} ALL DEPENDS ${files}) endfunction() ############################################################################## # @brief Replace DATA{} references with real arguments. # # @param [in] target Name of the external data target. # @param [out] outArgsVar List of expanded arguments. function(ExternalData_expand_arguments target outArgsVar) # Replace DATA{} references with real arguments. set(data_regex "^xDATA{([^{}\r\n]*)}$") set(outArgs "") foreach(arg IN LISTS ARGN) if("x${arg}" MATCHES "${data_regex}") string(REGEX REPLACE "${data_regex}" "\\1" data "x${arg}") _ExternalData_arg("${target}" "${arg}" "${data}" file) list(APPEND outArgs "${file}") else() list(APPEND outArgs "${arg}") endif() endforeach() set("${outArgsVar}" "${outArgs}" PARENT_SCOPE) endfunction() #----------------------------------------------------------------------------- # Private helper interface set(_ExternalData_SELF "${CMAKE_CURRENT_LIST_FILE}") get_filename_component(_ExternalData_SELF_DIR "${_ExternalData_SELF}" PATH) function(_ExternalData_compute_hash var_hash algo file) if("${algo}" STREQUAL "MD5") # TODO: Errors execute_process(COMMAND "${CMAKE_COMMAND}" -E md5sum "${file}" OUTPUT_VARIABLE output) string(SUBSTRING ${output} 0 32 hash) set("${var_hash}" "${hash}" PARENT_SCOPE) else() # TODO: Other hashes. message(FATAL_ERROR "Hash algorithm ${algo} unimplemented.") endif() endfunction() function(_ExternalData_atomic_write file content) string(RANDOM LENGTH 6 random) set(tmp "${file}.tmp${random}") file(WRITE "${tmp}" "${content}") file(RENAME "${tmp}" "${file}") endfunction() function(_ExternalData_link_content name var_ext) if("${ExternalData_LINK_CONTENT}" MATCHES "^(MD5)$") set(algo "${ExternalData_LINK_CONTENT}") else() message(FATAL_ERROR "Unknown hash algorithm specified by ExternalData_LINK_CONTENT:\n" " ${ExternalData_LINK_CONTENT}") endif() _ExternalData_compute_hash(hash "${algo}" "${name}") get_filename_component(dir "${name}" PATH) set(staged "${dir}/.ExternalData_${algo}_${hash}") set(ext ".md5") _ExternalData_atomic_write("${name}${ext}" "${hash}\n") file(RENAME "${name}" "${staged}") set("${var_ext}" "${ext}" PARENT_SCOPE) file(RELATIVE_PATH relname "${ExternalData_SOURCE_ROOT}" "${name}${ext}") message(STATUS "Linked ${relname} to ExternalData ${algo}/${hash}") endfunction() function(_ExternalData_arg target arg data var_file) # Convert to full path. if(IS_ABSOLUTE "${data}") set(absdata "${data}") else() # TODO: If ${data} does not start in "./" or "../" then use search path? get_filename_component(absdata "${CMAKE_CURRENT_SOURCE_DIR}/${data}" ABSOLUTE) endif() # Convert to relative path under the source tree. if(NOT ExternalData_SOURCE_ROOT) set(ExternalData_SOURCE_ROOT "${CMAKE_SOURCE_DIR}") endif() set(top_src "${ExternalData_SOURCE_ROOT}") file(RELATIVE_PATH reldata "${top_src}" "${absdata}") if(IS_ABSOLUTE "${reldata}" OR "${reldata}" MATCHES "^\\.\\./") message(FATAL_ERROR "Data file referenced by argument\n" " ${arg}\n" "does not lie under the top-level source directory\n" " ${top_src}\n") endif() set(top_bin "${CMAKE_BINARY_DIR}/ExternalData") # TODO: .../${target} ? # Configure series parsing and matching. if(ExternalData_SERIES_PARSE) if(NOT "${ExternalData_SERIES_PARSE}" MATCHES "^\\^\\([^()]*\\)\\([^()]*\\)\\([^()]*\\)\\$$") message(FATAL_ERROR "ExternalData_SERIES_PARSE is set to\n" " ${ExternalData_SERIES_PARSE}\n" "which is not of the form\n" " ^(...)(...)(...)$\n") endif() set(series_parse "${ExternalData_SERIES_PARSE}") else() set(series_parse "^(.*[A-Za-z_.-])([0-9]*)(\\.[^.]*)$") endif() if(ExternalData_SERIES_MATCH) set(series_match "${ExternalData_SERIES_MATCH}") else() set(series_match "[_.]?[0-9]*") endif() # Parse the base, number, and extension components of the series. string(REGEX REPLACE "${series_parse}" "\\1;\\2;\\3" tuple "${reldata}") list(LENGTH tuple len) if(NOT "${len}" EQUAL 3) message(FATAL_ERROR "Data file referenced by argument\n" " ${arg}\n" "corresponds to path\n" " ${reldata}\n" "that does not match regular expression\n" " ${series_parse}") endif() # Glob files that might match the series. list(GET tuple 0 relbase) list(GET tuple 2 ext) set(pattern "${relbase}*${ext}*") file(GLOB globbed RELATIVE "${top_src}" "${top_src}/${pattern}") # Match base, number, and extension perhaps followed by a hash ext. string(REGEX REPLACE "([][+.*()^])" "\\\\\\1" series_base "${relbase}") string(REGEX REPLACE "([][+.*()^])" "\\\\\\1" series_ext "${ext}") set(series_regex "^(${series_base}${series_match}${series_ext})(\\.[^.]*|)$") set(external "") # Entries external to the source tree. set(internal "") # Entries internal to the source tree. set(have_original 0) foreach(entry IN LISTS globbed) string(REGEX REPLACE "${series_regex}" "\\1;\\2" tuple "${entry}") list(LENGTH tuple len) if("${len}" EQUAL 2) list(GET tuple 0 relname) list(GET tuple 1 alg) set(name "${top_src}/${relname}") set(file "${top_bin}/${relname}") if(alg) list(APPEND external "${file}|${name}|${alg}") elseif(ExternalData_LINK_CONTENT) _ExternalData_link_content("${name}" alg) list(APPEND external "${file}|${name}|${alg}") else() list(APPEND internal "${file}|${name}") endif() if("${relname}" STREQUAL "${reldata}") set(have_original 1) endif() endif() endforeach() if(NOT have_original) message(FATAL_ERROR "Data file referenced by argument\n" " ${arg}\n" "corresponds to source tree path\n" " ${reldata}\n" "that does not exist (with or without an extension)!") endif() if(external) # Make the series available in the build tree. set_property(GLOBAL APPEND PROPERTY _ExternalData_${target}_FETCH "${external}") set_property(GLOBAL APPEND PROPERTY _ExternalData_${target}_LOCAL "${internal}") set("${var_file}" "${top_bin}/${reldata}" PARENT_SCOPE) else() # The whole series is in the source tree. set("${var_file}" "${top_src}/${reldata}" PARENT_SCOPE) endif() endfunction() #----------------------------------------------------------------------------- # Private script mode interface if(CMAKE_GENERATOR OR NOT ExternalData_ACTION) return() endif() if(ExternalData_CONFIG) include(${ExternalData_CONFIG}) endif() if(NOT ExternalData_URL_TEMPLATES) message(FATAL_ERROR "No ExternalData_URL_TEMPLATES set!") endif() function(_ExternalData_link_or_copy src dst) # Create a temporary file first. get_filename_component(dst_dir "${dst}" PATH) file(MAKE_DIRECTORY "${dst_dir}") string(RANDOM LENGTH 6 random) set(tmp "${dst}.tmp${random}") if(UNIX) # Create a symbolic link. set(tgt "${src}") if(relative_top) # Use relative path if files are close enough. file(RELATIVE_PATH relsrc "${relative_top}" "${src}") file(RELATIVE_PATH relfile "${relative_top}" "${dst}") if(NOT IS_ABSOLUTE "${relsrc}" AND NOT "${relsrc}" MATCHES "^\\.\\./" AND NOT IS_ABSOLUTE "${reldst}" AND NOT "${reldst}" MATCHES "^\\.\\./") file(RELATIVE_PATH tgt "${dst_dir}" "${src}") endif() endif() execute_process(COMMAND "${CMAKE_COMMAND}" -E create_symlink "${tgt}" "${tmp}" RESULT_VARIABLE result) else() # Create a copy. execute_process(COMMAND "${CMAKE_COMMAND}" -E copy "${src}" "${tmp}" RESULT_VARIABLE result) endif() if(result) file(REMOVE "${tmp}") message(FATAL_ERROR "Failed to create\n ${tmp}\nfrom\n ${obj}") endif() # Atomically create/replace the real destination. file(RENAME "${tmp}" "${dst}") endfunction() function(_ExternalData_download_object name hash algo var_obj) set(obj "${ExternalData_OBJECT_DIR}/${algo}/${hash}") if(EXISTS "${obj}") message(STATUS "Found object: \"${obj}\"") set("${var_obj}" "${obj}" PARENT_SCOPE) return() endif() string(RANDOM LENGTH 6 random) set(tmp "${obj}.tmp${random}") set(found 0) set(tried "") foreach(url_template IN LISTS ExternalData_URL_TEMPLATES) string(REPLACE "%(hash)" "${hash}" url_tmp "${url_template}") string(REPLACE "%(algo)" "${algo}" url "${url_tmp}") message(STATUS "Fetching \"${url}\"") file(DOWNLOAD "${url}" "${tmp}" STATUS status SHOW_PROGRESS) # TODO: timeout set(tried "${tried}\n ${url}") list(GET status 0 err) if(err) list(GET status 1 errMsg) set(tried "${tried} (${errMsg})") else() # Verify downloaded object. _ExternalData_compute_hash(dl_hash "${algo}" "${tmp}") if("${dl_hash}" STREQUAL "${hash}") set(found 1) break() else() set(tried "${tried} (wrong hash ${algo}=${dl_hash})") endif() endif() file(REMOVE "${tmp}") endforeach() get_filename_component(dir "${name}" PATH) set(staged "${dir}/.ExternalData_${algo}_${hash}") if(found) file(RENAME "${tmp}" "${obj}") message(STATUS "Downloaded object: \"${obj}\"") elseif(EXISTS "${staged}") set(obj "${staged}") message(STATUS "Staged object: \"${obj}\"") else() message(FATAL_ERROR "Object ${algo}=${hash} not found at:${tried}") endif() set("${var_obj}" "${obj}" PARENT_SCOPE) endfunction() if("${ExternalData_ACTION}" STREQUAL "fetch") foreach(v ExternalData_OBJECT_DIR file name ext) if(NOT DEFINED "${v}") message(FATAL_ERROR "No \"-D${v}=\" value provided!") endif() endforeach() file(READ "${name}${ext}" hash) string(STRIP "${hash}" hash) if("${ext}" STREQUAL ".md5") set(algo "MD5") else() message(FATAL_ERROR "Unknown hash algorithm extension \"${ext}\"") endif() _ExternalData_download_object("${name}" "${hash}" "${algo}" obj) # Check if file already corresponds to the object. set(file_up_to_date 0) if(EXISTS "${file}" AND EXISTS "${file}${ext}") file(READ "${file}${ext}" f_hash) string(STRIP "${f_hash}" f_hash) if("${f_hash}" STREQUAL "${hash}") #message(STATUS "File already corresponds to object") set(file_up_to_date 1) endif() endif() if(file_up_to_date) # Touch the file to convince the build system it is up to date. execute_process(COMMAND "${CMAKE_COMMAND}" -E touch "${file}") else() _ExternalData_link_or_copy("${obj}" "${file}") endif() # Atomically update the hash/timestamp file to record the object referenced. _ExternalData_atomic_write("${file}${ext}" "${hash}\n") elseif("${ExternalData_ACTION}" STREQUAL "local") foreach(v file name) if(NOT DEFINED "${v}") message(FATAL_ERROR "No \"-D${v}=\" value provided!") endif() endforeach() _ExternalData_link_or_copy("${name}" "${file}") elseif("${ExternalData_ACTION}" STREQUAL "store") foreach(v dir file) if(NOT DEFINED "${v}") message(FATAL_ERROR "No \"-D${v}=\" value provided!") endif() endforeach() if(NOT DEFINED algo) set(algo "MD5") endif() _ExternalData_compute_hash(hash "${algo}" "${file}") else() message(FATAL_ERROR "Unknnown ExternalData_ACTION=[${ExternalData_ACTION}]") endif()