Skip to content

Commit

Permalink
Merge branch 'develop' into wx_1819_reference_disks_in_batch
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr committed Aug 21, 2024
2 parents 4f2a8cd + b1d661b commit 922ee93
Show file tree
Hide file tree
Showing 37 changed files with 33 additions and 1,343 deletions.
24 changes: 0 additions & 24 deletions backend/src/main/scala/cromwell/backend/WriteFunctions.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
package cromwell.backend

import java.util.UUID

import better.files.File.OpenOptions
import cats.implicits._
import common.util.StringUtil._
import cromwell.core.io.AsyncIoFunctions
import cromwell.core.path.{Path, PathFactory}
import wom.expression.IoFunctionSet
Expand All @@ -27,13 +24,6 @@ trait WriteFunctions extends PathFactory with IoFunctionSet with AsyncIoFunction
private lazy val _writeDirectory =
if (isDocker) writeDirectory.createPermissionedDirectories() else writeDirectory.createDirectories()

override def createTemporaryDirectory(name: Option[String]) = {
val tempDirPath = _writeDirectory / name.getOrElse(UUID.randomUUID().toString)
// This is evil, but has the added advantage to work both for cloud and local
val tempDirHiddenFile = tempDirPath / ".file"
asyncIo.writeAsync(tempDirHiddenFile, "", OpenOptions.default) as tempDirPath.pathAsString
}

protected def writeAsync(file: Path, content: String) = asyncIo.writeAsync(file, content, OpenOptions.default)

override def writeFile(path: String, content: String): Future[WomSingleFile] = {
Expand All @@ -43,18 +33,4 @@ trait WriteFunctions extends PathFactory with IoFunctionSet with AsyncIoFunction
case true => Future.successful(WomSingleFile(file.pathAsString))
}
}

private val relativeToLocal = System.getProperty("user.dir").ensureSlashed

def relativeToAbsolutePath(pathFrom: String): String =
if (buildPath(pathFrom).isAbsolute) pathFrom else relativeToLocal + pathFrom

override def copyFile(pathFrom: String, targetName: String): Future[WomSingleFile] = {
val source = buildPath(relativeToAbsolutePath(pathFrom))
val destination = _writeDirectory / targetName

asyncIo.copyAsync(source, destination).as(WomSingleFile(destination.pathAsString)) recoverWith { case e =>
Future.failed(new Exception(s"Could not copy ${source.toAbsolutePath} to ${destination.toAbsolutePath}", e))
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,12 @@ package cromwell.backend.io
import cats.implicits._
import common.util.StringUtil._
import common.validation.ErrorOr._
import common.validation.Validation._
import cromwell.backend.BackendJobDescriptor
import cromwell.backend.io.DirectoryFunctions.listFiles
import cromwell.core.io.AsyncIoFunctions
import cromwell.core.path.{Path, PathFactory}
import wom.expression.IoFunctionSet.{IoDirectory, IoElement, IoFile}
import wom.expression.{IoFunctionSet, IoFunctionSetAdapter}
import wom.graph.CommandCallNode
import wom.values.{
WomFile,
WomGlobFile,
WomMaybeListedDirectory,
WomMaybePopulatedFile,
WomSingleFile,
WomUnlistedDirectory
}

import scala.concurrent.Future
import scala.util.Try
import wom.values.{WomFile, WomGlobFile, WomSingleFile, WomUnlistedDirectory}

trait DirectoryFunctions extends IoFunctionSet with PathFactory with AsyncIoFunctions {

Expand All @@ -40,50 +27,10 @@ trait DirectoryFunctions extends IoFunctionSet with PathFactory with AsyncIoFunc
}
}
}

override def isDirectory(path: String) = asyncIo.isDirectory(buildPath(path))

/*
* Several things are wrong here.
* 1) None of this is going through the I/O Actor: https://github.com/broadinstitute/cromwell/issues/3133
* which means no instrumentation, no throttling, no batching, and no custom retries.
* 2) The NIO implementation of "list" in GCS will list all objects with the prefix "path", unlike the unix
* implementation which lists files and directories children. What we need is the unix behavior, even for cloud filesystems.
* 3) It uses the isDirectory function directly on the path, which cannot be trusted for GCS paths. It should use asyncIo.isDirectory instead.
*/
override def listDirectory(path: String)(visited: Vector[String] = Vector.empty): Future[Iterator[IoElement]] =
Future.fromTry(Try {
val visitedPaths = visited.map(buildPath)
val cromwellPath = buildPath(path.ensureSlashed)

// To prevent infinite recursion through symbolic links make sure we don't visit the same directory twice
def hasBeenVisited(other: Path) = visitedPaths.exists(_.isSameFileAs(other))

cromwellPath.list.collect {
case directory
if directory.isDirectory &&
!cromwellPath.isSamePathAs(directory) &&
!hasBeenVisited(directory) =>
IoDirectory(directory.pathAsString)
case file => IoFile(file.pathAsString)
}
})

override def listAllFilesUnderDirectory(dirPath: String): Future[Seq[String]] =
temporaryImplListPaths(dirPath)

// TODO: WOM: WOMFILE: This will likely use a Tuple2(tar file, dir list file) for each dirPath.
final private def temporaryImplListPaths(dirPath: String): Future[Seq[String]] = {
val errorOrPaths = for {
dir <- validate(buildPath(dirPath.ensureSlashed))
files <- listFiles(dir)
} yield files.map(_.pathAsString)
Future.fromTry(errorOrPaths.toTry(s"Error listing files under $dirPath"))
}
}

object DirectoryFunctions {
def listFiles(path: Path): ErrorOr[List[Path]] = path.listRecursively.filterNot(_.isDirectory).toList.validNel
private def listFiles(path: Path): ErrorOr[List[Path]] = path.listRecursively.filterNot(_.isDirectory).toList.validNel

def listWomSingleFiles(womFile: WomFile, pathFactory: PathFactory): ErrorOr[List[WomSingleFile]] = {
def listWomSingleFiles(womFile: WomFile): ErrorOr[List[WomSingleFile]] =
Expand All @@ -94,21 +41,7 @@ object DirectoryFunctions {
val errorOrListPaths = listFiles(pathFactory.buildPath(womUnlistedDirectory.value.ensureSlashed))
errorOrListPaths.map(_.map(path => WomSingleFile(path.pathAsString)))

case womMaybePopulatedFile: WomMaybePopulatedFile =>
val allFiles: List[WomFile] =
womMaybePopulatedFile.valueOption.toList.map(WomSingleFile) ++ womMaybePopulatedFile.secondaryFiles
allFiles.traverse(listWomSingleFiles).map(_.flatten)

case w: WomMaybeListedDirectory =>
(w.valueOption, w.listingOption) match {
case (None, None) => Nil.valid
case (Some(value), None) => listWomSingleFiles(WomUnlistedDirectory(value))
case (None, Some(listing)) => listing.toList.traverse(listWomSingleFiles).map(_.flatten)
// TODO: WOM: WOMFILE: This is a special case where files from a different path are supposed to end up under the directory. If this implementation is correct, remove this TODO.
case (Some(_), Some(listing)) => listing.toList.traverse(listWomSingleFiles).map(_.flatten)
}
// TODO: WOM: WOMFILE: How did a glob get here? Should this link into glob functions to list the globs?

case _: WomGlobFile => s"Unexpected glob / unable to list glob files at this time: $womFile".invalidNel
}

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,19 @@ import cromwell.core.path.PathFactory
import cromwell.core.path.PathFactory.PathBuilders
import wom.expression.{IoFunctionSet, PathFunctionSet}

import scala.util.Try

class WorkflowCorePathFunctionSet(override val pathBuilders: PathBuilders) extends PathFunctionSet with PathFactory {
private def fail(name: String) = throw new UnsupportedOperationException(
s"$name is not implemented at the workflow level"
)
override def sibling(of: String, path: String): String = buildPath(of).sibling(path).pathAsString
override def isAbsolute(path: String): Boolean = Try(buildPath(path)).map(_.isAbsolute).toOption.contains(true)
override def name(path: String) = buildPath(path).name

// Call level functions
override def relativeToHostCallRoot(path: String): String = fail("relativeToHostCallRoot")
override def stdout: String = fail("stdout")
override def stderr: String = fail("stderr")
}

class CallCorePathFunctionSet(pathBuilders: PathBuilders, callContext: CallContext)
extends WorkflowCorePathFunctionSet(pathBuilders) {
override def relativeToHostCallRoot(path: String) =
if (isAbsolute(path)) path else callContext.root.resolve(path).pathAsString
override def stdout = callContext.standardPaths.output.pathAsString
override def stderr = callContext.standardPaths.error.pathAsString
}
Expand Down
65 changes: 0 additions & 65 deletions core/src/main/scala/cromwell/core/simpleton/WomValueBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,39 +101,6 @@ object WomValueBuilder {
case MapElementPattern("right", more) => PairRight -> component.copy(path = more)
}

def toWomFile(components: Iterable[SimpletonComponent]) =
// If there's just one simpleton, it's a primitive (file or directory)
if (components.size == 1) components.asPrimitive
else {
// Otherwise make a map of the components and detect the type of file from the class field
val groupedListing = components.asMap

def isClass(className: String) =
groupedListing
.get(ClassKey)
/* If the class field is in an array it will be prefixed with a ':', so check for that as well.
* e.g: secondaryFiles[0]:class -> "File"
* secondaryFiles[0]:value -> "file/path"
* would produce a Map(
* ":class" -> List(Simpleton("File")),
* ":value" -> List(Simpleton("file/path"))
* )
*/
.orElse(groupedListing.get(s":$ClassKey"))
.map(_.asPrimitive.valueString)
.contains(className)

def isDirectory = isClass(WomValueSimpleton.DirectoryClass)
def isFile = isClass(WomValueSimpleton.FileClass)

if (isDirectory) toWomValue(WomMaybeListedDirectoryType, components)
else if (isFile) toWomValue(WomMaybePopulatedFileType, components)
else
throw new IllegalArgumentException(
s"There is no WomFile that can be built from simpletons: ${groupedListing.toList.mkString(", ")}"
)
}

outputType match {
case _: WomPrimitiveType =>
components.asPrimitive
Expand Down Expand Up @@ -175,38 +142,6 @@ object WomValueBuilder {
k -> toWomValue(valueType, ss)
}
WomObject.withTypeUnsafe(map, composite)
case WomMaybeListedDirectoryType =>
val directoryValues = components.asMap

val value = directoryValues.get("value").map(_.asString)
val listing = directoryValues
.get("listing")
.map(_.asArray.map(toWomFile).collect { case womFile: WomFile => womFile })

WomMaybeListedDirectory(value, listing)
case WomMaybePopulatedFileType =>
val populatedValues = components.asMap

val value = populatedValues.get("value").map(_.asString)
val checksum = populatedValues.get("checksum").map(_.asString)
val size = populatedValues.get("size").map(_.asString.toLong)
val format = populatedValues.get("format").map(_.asString)
val contents = populatedValues.get("contents").map(_.asString)
val secondaryFiles = populatedValues.get("secondaryFiles").toList.flatMap {
_.asArray.map(toWomFile).collect { case womFile: WomFile => womFile }
}

WomMaybePopulatedFile(
valueOption = value,
checksumOption = checksum,
sizeOption = size,
formatOption = format,
contentsOption = contents,
secondaryFiles = secondaryFiles
)
case coproductType: WomCoproductType =>
// We don't currently record the actual type of the coproduct value so use the same heuristics as for Any.
WomCoproductValue(coproductType, toWomValue(WomAnyType, components))

case WomAnyType =>
// Ok, we're going to have to guess, but the keys should give us some clues:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,10 @@ object WomValueSimpleton {
case class SimplifyMode(forCaching: Boolean)

implicit class WomValueSimplifier(womValue: WomValue) {
private def toStringSimpleton(key: String)(value: String) = WomValueSimpleton(key, WomString(value))
private def toNumberSimpleton(key: String)(value: Long) = WomValueSimpleton(key, WomInteger(value.toInt))

// Pass the simplifyMode down to recursive calls without having to sling the parameter around explicitly.
def simplify(
name: String
)(implicit simplifyMode: SimplifyMode = SimplifyMode(forCaching = false)): Iterable[WomValueSimpleton] = {
def suffix(suffix: String) = s"$name:$suffix"
val fileValueSimplifier: String => String => WomValueSimpleton =
if (simplifyMode.forCaching) key => value => WomValueSimpleton(key, WomSingleFile(value)) else toStringSimpleton
// What should this even do? Maybe just pick out the last bit of the path and store that as a String?
val directoryValueSimplifier: String => String => WomValueSimpleton =
if (simplifyMode.forCaching)
key => value => WomValueSimpleton(key, WomString(value.substring(value.lastIndexOf("/") + 1)))
else toStringSimpleton

)(implicit simplifyMode: SimplifyMode = SimplifyMode(forCaching = false)): Iterable[WomValueSimpleton] =
womValue match {
case prim: WomPrimitive => List(WomValueSimpleton(name, prim))
case opt: WomOptionalValue => opt.value.map(_.simplify(name)).getOrElse(Seq.empty)
Expand All @@ -59,33 +47,8 @@ object WomValueSimpleton {
womObjectLike.values flatMap { case (key, value) =>
value.simplify(s"$name:${key.escapeMeta}")
}
case WomMaybeListedDirectory(valueOption, listingOption, _, _) =>
// This simpleton is not strictly part of the WomFile but is used to record the type of this WomValue so it can
// be re-built appropriately in the WomValueBuilder
val classSimpleton = Option(toStringSimpleton(suffix(ClassKey))(DirectoryClass))
val valueSimpleton = valueOption.map(directoryValueSimplifier(suffix("value")))
val listingSimpletons = listingOption.toList.flatMap(files =>
files.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(suffix(s"listing[$index]")) }
)
classSimpleton ++ listingSimpletons ++ valueSimpleton
case womMaybePopulatedFile: WomMaybePopulatedFile =>
// This simpleton is not strictly part of the WomFile but is used to record the type of this WomValue so it can
// be re-built appropriately in the WomValueBuilder
val classSimpleton = Option(toStringSimpleton(suffix(ClassKey))(FileClass))
val valueSimpleton = womMaybePopulatedFile.valueOption.map(fileValueSimplifier(suffix("value")))
val checksumSimpleton = womMaybePopulatedFile.checksumOption.map(toStringSimpleton(suffix("checksum")))
val contentsSimpleton = womMaybePopulatedFile.contentsOption.map(toStringSimpleton(suffix("contents")))
val sizeSimpleton = womMaybePopulatedFile.sizeOption.map(toNumberSimpleton(suffix("size")))
val formatSimpleton = womMaybePopulatedFile.formatOption.map(toStringSimpleton(suffix("format")))
val secondaryFilesSimpletons = womMaybePopulatedFile.secondaryFiles.toList.zipWithIndex flatMap {
case (arrayItem, index) => arrayItem.simplify(suffix(s"secondaryFiles[$index]"))
}

classSimpleton ++ valueSimpleton ++ checksumSimpleton ++ contentsSimpleton ++ sizeSimpleton ++ formatSimpleton ++ secondaryFilesSimpletons
case womCoproduct: WomCoproductValue => womCoproduct.womValue.simplify(name)
case other => throw new Exception(s"Cannot simplify wdl value $other of type ${other.womType}")
}
}
}

implicit class WomValuesSimplifier(womValues: Map[String, WomValue]) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ object WomValueJsonFormatter extends DefaultJsonProtocol {
case q: WomPair => new JsObject(Map("left" -> write(q.left), "right" -> write(q.right)))
case WomOptionalValue(_, Some(innerValue)) => write(innerValue)
case WomOptionalValue(_, None) => JsNull
case WomCoproductValue(_, innerValue) => write(innerValue)
case WomEnumerationValue(_, innerValue) => JsString(innerValue)
// handles WdlExpression
case v: WomValue => JsString(v.toWomString)

Expand Down
Loading

0 comments on commit 922ee93

Please sign in to comment.