Skip to content

Adds license parsing to main pipeline #257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ci/src/Registry/API.purs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import Registry.PackageUpload as Upload
import Registry.RegistryM (Env, RegistryM, closeIssue, comment, commitToTrunk, readPackagesMetadata, runRegistryM, throwWithComment, updatePackagesMetadata, uploadPackage)
import Registry.Schema (Manifest, Metadata, Operation(..), Repo(..), addVersionToMetadata, mkNewMetadata)
import Registry.Scripts.LegacyImport as LegacyImport
import Registry.Scripts.LegacyImport.Bowerfile as Bowerfile
import Registry.Types (bowerToManifestFields)
import Sunde as Process
import Text.Parsing.StringParser as StringParser

Expand Down Expand Up @@ -151,7 +151,7 @@ addOrUpdate { ref, fromBower, packageName } metadata = do
let absoluteTarballPath = tmpDir <> "/" <> tarballName
let archiveUrl = "https://github.com/" <> owner <> "/" <> repo <> "/archive/" <> tarballName
log $ "Fetching tarball from GitHub: " <> archiveUrl
wget archiveUrl absoluteTarballPath
wget archiveUrl absoluteTarballPath ------------------------------ we have the git repo
log $ "Tarball downloaded in " <> absoluteTarballPath
liftEffect (Tar.getToplevelDir absoluteTarballPath) >>= case _ of
Nothing ->
Expand Down Expand Up @@ -179,7 +179,7 @@ addOrUpdate { ref, fromBower, packageName } metadata = do
Json.stringifyWithIndent 2 <<< Json.encodeJson <<< NEA.toArray

manifestFields =
Bowerfile.toManifestFields bowerfile
bowerToManifestFields bowerfile

runManifest =
Except.runExceptT <<< Except.mapExceptT (liftAff <<< map (lmap printErrors))
Expand Down
10 changes: 10 additions & 0 deletions ci/src/Registry/Error.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module Registry.Error where

import Prelude

import Data.Array.NonEmpty (NonEmptyArray)
import Data.Array.NonEmpty as NEA
import Data.Either (Either(..))

mkError :: forall e a. e -> Either (NonEmptyArray e) a
mkError = Left <<< NEA.singleton
64 changes: 64 additions & 0 deletions ci/src/Registry/License.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
module Registry.License (LicenseError(..), produceLicense) where

import Prelude

import Data.Argonaut as Json
import Data.Argonaut.Decode.Generic as Json.Decode.Generic
import Data.Argonaut.Encode.Generic as Json.Encode.Generic
import Data.Argonaut.Types.Generic as Json.Generic
import Data.Array.NonEmpty (NonEmptyArray)
import Data.Array.NonEmpty as NEA
import Data.Either (Either(..))
import Data.Filterable (filter)
import Data.Generic.Rep (class Generic)
import Data.Maybe (Maybe(..))
import Data.String.NonEmpty (NonEmptyString)
import Data.String.NonEmpty as NES
import Foreign.SPDX (License)
import Foreign.SPDX as SPDX
import Registry.Prelude (genericShow, partitionEithers)

data LicenseError
= MissingLicense
| BadLicense (Array String)

derive instance eqLicenseError :: Eq LicenseError
derive instance ordLicenseError :: Ord LicenseError
derive instance genericLicenseError :: Generic LicenseError _
instance showLicenseError :: Show LicenseError where
show e = genericShow e

instance Json.EncodeJson LicenseError where
encodeJson = Json.Encode.Generic.genericEncodeJsonWith (Json.Generic.defaultEncoding { unwrapSingleArguments = true })

instance Json.DecodeJson LicenseError where
decodeJson = Json.Decode.Generic.genericDecodeJsonWith (Json.Generic.defaultEncoding { unwrapSingleArguments = true })

produceLicense :: Maybe (NonEmptyArray NonEmptyString) -> Either LicenseError License
produceLicense license = do
let
rewrite = case _ of
"Apache 2" -> "Apache-2.0"
"Apache-2" -> "Apache-2.0"
"Apache 2.0" -> "Apache-2.0"
"BSD" -> "BSD-3-Clause"
"BSD3" -> "BSD-3-Clause"
"BSD-3" -> "BSD-3-Clause"
"3-Clause BSD" -> "BSD-3-Clause"
other -> other

case license of
Nothing -> Left MissingLicense
Just licenses -> do
let
parsed =
map (SPDX.parse <<< rewrite)
$ filter (_ /= "LICENSE")
$ map NES.toString
$ NEA.toArray licenses
{ fail, success } = partitionEithers parsed

case fail, success of
[], [] -> Left MissingLicense
[], _ -> Right $ SPDX.joinWith SPDX.Or success
_, _ -> Left $ BadLicense fail
176 changes: 176 additions & 0 deletions ci/src/Registry/Manifest.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
module Registry.Manifest where

import Registry.Prelude

import Affjax as Http
import Affjax.ResponseFormat as ResponseFormat
import Affjax.StatusCode (StatusCode(..))
import Control.Monad.Except as Except
import Control.Parallel (parallel, sequential)
import Data.Argonaut as Json
import Data.Array as Array
import Data.Array.NonEmpty as NEA
import Data.Compactable (compact)
import Data.Interpolate (i)
import Data.Maybe (maybe)
import Data.String as String
import Data.String.NonEmpty as NES
import Foreign.Dhall as Dhall
import Foreign.GitHub as GitHub
import Foreign.Jsonic as Jsonic
import Foreign.Licensee as Licensee
import Foreign.Tmp as Tmp
import Node.FS.Aff as FS
import Registry.Process as Process
import Registry.Types (FileResource(..), ImportError(..), ManifestFields, RawPackageName(..), RawVersion(..), RemoteResource(..), RequestError(..), SpagoJson, bowerToManifestFields, fileResourcePath, spagoToManifestFields)

-- | Attempt to construct the basic fields necessary for a manifest file by reading
-- | the package version's bower.json, spago.dhall, package.json, and LICENSE
-- | files, if present.
constructManifestFields
:: RawPackageName
-> RawVersion
-> GitHub.Address
-> ExceptT ImportError Aff ManifestFields
constructManifestFields package version address = do
let cacheKey = i "manifest-fields__" (un RawPackageName package) "__" (un RawVersion version)
Process.withCache Process.jsonSerializer cacheKey Nothing do
-- We can construct a manifest from a package's bowerfile, package.json file,
-- spago.dhall file, and/or LICENSE files. A package doesn't need to have all
-- of these files; several of these files duplicate information. We try to
-- fetch all files but won't throw an exception (yet) if they're missing.
log $ "Constructing manifest fields for " <> un RawPackageName package <> " " <> un RawVersion version
let mkRequest file = parallel $ Except.runExceptT $ fileRequest file Process.stringSerializer
files <- liftAff $ sequential ado
licenseFile <- mkRequest LicenseFile
bowerJson <- mkRequest BowerJson
packageJson <- mkRequest PackageJson
in { licenseFile, bowerJson, packageJson }

-- TODO: Improve this heuristic by checking the Bower _and_ Spago files.
--
-- We can pull dependencies from the bower.json or spago.dhall files. If both
-- files are present, but their dependencies differ, then we should use the
-- file with newer dependencies; presumably, it's the more up-to-date file.
--
-- Since Bower users typically use ranges, but package sets use precise
-- versions, we could check to see whether one uses later major versions
-- than the other does; checking minor or patch versions will be inaccurate.
--
-- If the files differ but it isn't clear which file is newer, then we should
-- prefer the Bower file since it's the legacy format used for package p
-- publishing.
--
-- For now, that's exactly what we do: use the Bower file if it is present,
-- and otherwise fall back to the Spago file.
bowerManifest <- Except.runExceptT do
result <- Except.except files.bowerJson
case Jsonic.parseJson result >>= Json.decodeJson of
Left err -> do
let printed = Json.printJsonDecodeError err
log $ "Could not decode returned bower.json. " <> printed
log result
throwError $ ResourceError { resource: FileResource BowerJson, error: DecodeError printed }
Right bowerfile ->
pure $ bowerToManifestFields bowerfile

spagoJson <- liftAff $ Except.runExceptT requestSpagoJson
let spagoManifest = map spagoToManifestFields spagoJson

{ dependencies, devDependencies } <- case bowerManifest, spagoManifest of
Left _, Left _ -> do
-- TODO: We may want to report a `NonEmptyArray ImportError` so as to
-- report on multiple errors, such as the multiple missing files in this
-- situation.
throwError NoDependencyFiles
Left _, Right { dependencies, devDependencies } ->
pure { dependencies, devDependencies }
Right { dependencies, devDependencies }, _ ->
pure { dependencies, devDependencies }

-- We can detect the license for the project using a combination of `licensee`
-- and reading the license directly out of the Spago and Bower files (the
-- CLI tool will not read from either file).
licenseeOutput <- detectLicense files ------------------ pull this out

let
spagoLicenses = maybe [] NEA.toArray $ _.license =<< hush spagoManifest
bowerLicenses = maybe [] NEA.toArray $ _.license =<< hush bowerManifest
licenseeLicenses = compact $ map NES.fromString licenseeOutput
license = NEA.fromArray $ Array.nub $ Array.concat [ licenseeLicenses, spagoLicenses, bowerLicenses ]

when (license == Nothing) do
log $ "No license available for " <> un RawPackageName package <> " " <> un RawVersion version

pure { license, dependencies, devDependencies }
where
detectLicense { licenseFile, packageJson } = do
licenseeResult <- liftAff $ Licensee.detectFiles $ compact $ map hush
-- Detection only works on these files, and won't work on Spago files,
-- Bower files, or the JSON produced by the dhall-to-json result of
-- converting the Spago file.
[ packageJson <#> { name: "package.json", contents: _ }
, licenseFile <#> { name: "LICENSE", contents: _ }
]

detectedLicenses <- case licenseeResult of
Left err -> do
log $ "Licensee decoding error, ignoring: " <> err
pure []
Right licenses ->
pure licenses

pure detectedLicenses

-- Attempt to construct a Spago JSON file by fetching the spago.dhall and
-- packages.dhall files and converting them to JSON with dhall-to-json.
requestSpagoJson :: ExceptT ImportError Aff SpagoJson
requestSpagoJson = do
files <- sequential ado
spagoDhall <- parallel $ fileRequest SpagoDhall Process.stringSerializer
packagesDhall <- parallel $ fileRequest PackagesDhall Process.stringSerializer
in { spagoDhall, packagesDhall }

tmp <- liftEffect Tmp.mkTmpDir
liftAff $ FS.writeTextFile UTF8 (tmp <> "/packages.dhall") files.packagesDhall

spagoJson <- do
let
mkError = ResourceError <<< { resource: FileResource SpagoDhall, error: _ } <<< DecodeError
runDhallJson = Dhall.dhallToJson { dhall: files.spagoDhall, cwd: Just tmp }

Except.mapExceptT (map (lmap mkError))
$ Except.ExceptT
$ map (_ >>= (Json.decodeJson >>> lmap Json.printJsonDecodeError)) runDhallJson

pure spagoJson

-- Request a file from the remote repository associated with the package
-- version. Files will be cached using the provided serializer and
-- will be read from the cache up to the cache expiry time given in `Hours`.
fileRequest :: FileResource -> Process.Serialize String String -> ExceptT ImportError Aff String
fileRequest resource serialize = do
let
name = un RawPackageName package
tag = un RawVersion version
filePath = fileResourcePath resource
url = i "https://raw.githubusercontent.com/" address.owner "/" address.repo "/" tag "/" filePath
fileCacheName = String.replace (String.Pattern ".") (String.Replacement "-") filePath
cacheKey = i fileCacheName "__" name "__" tag
mkError = ResourceError <<< { resource: FileResource resource, error: _ }

Process.withCache serialize cacheKey Nothing do
liftAff (Http.get ResponseFormat.string url) >>= case _ of
Left error -> do
let printed = Http.printError error
log $ i "Unable to retrieve " filePath " because the request failed: " printed
throwError $ mkError BadRequest
Right { status: StatusCode status, body }
| status == 404 -> do
log $ i "Unable to retrieve " filePath " because none exists (404 error)."
throwError $ mkError $ BadStatus status
| status /= 200 -> do
log $ i "Unable to retrieve " filePath " because of a bad status code: " body
throwError $ mkError $ BadStatus status
| otherwise ->
pure body
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Registry.Scripts.LegacyImport.Process where
module Registry.Process where

import Registry.Prelude

Expand All @@ -22,8 +22,8 @@ import Foreign.SemVer (SemVer)
import Node.FS.Aff as FS
import Node.FS.Stats (Stats(..))
import Registry.PackageName (PackageName)
import Registry.Scripts.LegacyImport.Error (ImportError(..), ImportErrorKey, PackageFailures(..), RawPackageName, RawVersion, RequestError(..))
import Registry.Scripts.LegacyImport.Error as LegacyImport.Error
import Registry.Types (ImportError(..), ImportErrorKey, PackageFailures(..), RawPackageName, RawVersion, RequestError(..))
import Registry.Types as Registry.Types

type ProcessedPackages k a =
{ failures :: PackageFailures
Expand All @@ -47,7 +47,7 @@ forPackage input f = do
Except.runExceptT (f name value) >>= case _ of
Left err -> do
let
errorType = LegacyImport.Error.printImportErrorKey err
errorType = Registry.Types.printImportErrorKey err
failure = Map.singleton name (Left err)
var # modifyAVar \state -> state { failures = insertFailure errorType failure state.failures }
Right (Tuple newKey result) -> do
Expand Down Expand Up @@ -83,7 +83,7 @@ forPackageVersion input f = do
Except.runExceptT (f k1 k2 value) >>= case _ of
Left err -> do
let
errorType = LegacyImport.Error.printImportErrorKey err
errorType = Registry.Types.printImportErrorKey err
failure = Map.singleton name $ Right $ Map.singleton tag err
var # modifyAVar \state -> state { failures = insertFailure errorType failure state.failures }
Right result -> do
Expand Down Expand Up @@ -124,7 +124,7 @@ forPackageVersionKeys input f = do
Except.runExceptT (f k1 tag) >>= case _ of
Left err -> do
let
errorType = LegacyImport.Error.printImportErrorKey err
errorType = Registry.Types.printImportErrorKey err
failure = Map.singleton name $ Right $ Map.singleton tag err
var # modifyAVar \state -> state { failures = insertFailure errorType failure state.failures }
Right (Tuple k3 k4) -> do
Expand Down
Loading