aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Aiuto <aiuto@google.com>2022-11-29 17:29:12 -0500
committerTony Aiuto <aiuto@google.com>2022-11-30 00:09:56 -0500
commitefcc6e1a02887389c96b2322a6741677a8c99423 (patch)
treebb52046c2998df563cbb6699eb402de18e49544f
parentae2f8a528e14bd545ff5835f60592c6e148ef57e (diff)
downloadbazelbuild-rules_license-efcc6e1a02887389c96b2322a6741677a8c99423.tar.gz
Add package_info rule and a new gatherer to manage it.
- Add rules/package_info.bzl - Refactor get_transitive_licenses to get_transitive_metadata. - Take a list of providers to gather. - Some hackery for Bazel 5.x support. This can be fixed if starlark visibility gets backported to Bazel 5. - Add gather_metadata.bzl. This is so we can freely experiment on techniques for multi provider support in OSS land, without impacting existing users in Google. We can merge them some day in the future. - Create a dummy sbom writer. There is also experimental code to show a different design choice for new types of Metadata. I want to preserve both for a while to have a broader design discussion over the next month.
-rw-r--r--rules/gather_licenses_info.bzl4
-rw-r--r--rules/gather_metadata.bzl303
-rw-r--r--rules/licenses_core.bzl44
-rw-r--r--rules/package_info.bzl106
-rw-r--r--rules/providers.bzl38
-rw-r--r--rules/sbom.bzl159
-rw-r--r--tools/BUILD19
-rw-r--r--tools/write_sbom.py117
8 files changed, 777 insertions, 13 deletions
diff --git a/rules/gather_licenses_info.bzl b/rules/gather_licenses_info.bzl
index a5f1a41..b676972 100644
--- a/rules/gather_licenses_info.bzl
+++ b/rules/gather_licenses_info.bzl
@@ -16,7 +16,7 @@
load(
"@rules_license//rules:licenses_core.bzl",
"TraceInfo",
- "gather_licenses_info_common",
+ "gather_metadata_info_common",
"should_traverse",
)
load(
@@ -41,7 +41,7 @@ def _strip_null_repo(label):
return s
def _gather_licenses_info_impl(target, ctx):
- return gather_licenses_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, should_traverse)
+ return gather_metadata_info_common(target, ctx, TransitiveLicensesInfo, NAMESPACES, [], should_traverse)
gather_licenses_info = aspect(
doc = """Collects LicenseInfo providers into a single TransitiveLicensesInfo provider.""",
diff --git a/rules/gather_metadata.bzl b/rules/gather_metadata.bzl
new file mode 100644
index 0000000..4fc0bbd
--- /dev/null
+++ b/rules/gather_metadata.bzl
@@ -0,0 +1,303 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Rules and macros for collecting LicenseInfo providers."""
+
+load(
+ "@rules_license//rules:licenses_core.bzl",
+ "TraceInfo",
+ "gather_metadata_info_common",
+ "should_traverse",
+)
+load(
+ "@rules_license//rules:providers.bzl",
+ "MetadataInfo",
+ "PackageInfo",
+ "TransitiveMetadataInfo",
+)
+
+# Definition for compliance namespace, used for filtering licenses
+# based on the namespace to which they belong.
+NAMESPACES = ["compliance"]
+
+def _strip_null_repo(label):
+ """Removes the null repo name (e.g. @//) from a string.
+
+ The is to make str(label) compatible between bazel 5.x and 6.x
+ """
+ s = str(label)
+ if s.startswith('@//'):
+ return s[1:]
+ elif s.startswith('@@//'):
+ return s[2:]
+ return s
+
+def _bazel_package(label):
+ l = _strip_null_repo(label)
+ return l[0:-(len(label.name) + 1)]
+
+def _gather_metadata_info_impl(target, ctx):
+ return gather_metadata_info_common(target, ctx, TransitiveMetadataInfo, NAMESPACES, [MetadataInfo, PackageInfo], should_traverse)
+
+gather_metadata_info = aspect(
+ doc = """Collects LicenseInfo providers into a single TransitiveMetadataInfo provider.""",
+ implementation = _gather_metadata_info_impl,
+ attr_aspects = ["*"],
+ attrs = {
+ "_trace": attr.label(default = "@rules_license//rules:trace_target"),
+ },
+ provides = [TransitiveMetadataInfo],
+ apply_to_generating_rules = True,
+)
+
+def _write_metadata_info_impl(target, ctx):
+ """Write transitive license info into a JSON file
+
+ Args:
+ target: The target of the aspect.
+ ctx: The aspect evaluation context.
+
+ Returns:
+ OutputGroupInfo
+ """
+
+ if not TransitiveMetadataInfo in target:
+ return [OutputGroupInfo(licenses = depset())]
+ info = target[TransitiveMetadataInfo]
+ outs = []
+
+ # If the result doesn't contain licenses, we simply return the provider
+ if not hasattr(info, "target_under_license"):
+ return [OutputGroupInfo(licenses = depset())]
+
+ # Write the output file for the target
+ name = "%s_metadata_info.json" % ctx.label.name
+ content = "[\n%s\n]\n" % ",\n".join(metadata_info_to_json(info))
+ out = ctx.actions.declare_file(name)
+ ctx.actions.write(
+ output = out,
+ content = content,
+ )
+ outs.append(out)
+
+ if ctx.attr._trace[TraceInfo].trace:
+ trace = ctx.actions.declare_file("%s_trace_info.json" % ctx.label.name)
+ ctx.actions.write(output = trace, content = "\n".join(info.traces))
+ outs.append(trace)
+
+ return [OutputGroupInfo(licenses = depset(outs))]
+
+gather_metadata_info_and_write = aspect(
+ doc = """Collects TransitiveMetadataInfo providers and writes JSON representation to a file.
+
+ Usage:
+ blaze build //some:target \
+ --aspects=@rules_license//rules:gather_metadata_info.bzl%gather_metadata_info_and_write
+ --output_groups=licenses
+ """,
+ implementation = _write_metadata_info_impl,
+ attr_aspects = ["*"],
+ attrs = {
+ "_trace": attr.label(default = "@rules_license//rules:trace_target"),
+ },
+ provides = [OutputGroupInfo],
+ requires = [gather_metadata_info],
+ apply_to_generating_rules = True,
+)
+
+def write_metadata_info(ctx, deps, json_out):
+ """Writes TransitiveMetadataInfo providers for a set of targets as JSON.
+
+ TODO(aiuto): Document JSON schema. But it is under development, so the current
+ best place to look is at tests/hello_licenses.golden.
+
+ Usage:
+ write_metadata_info must be called from a rule implementation, where the
+ rule has run the gather_metadata_info aspect on its deps to
+ collect the transitive closure of LicenseInfo providers into a
+ LicenseInfo provider.
+
+ foo = rule(
+ implementation = _foo_impl,
+ attrs = {
+ "deps": attr.label_list(aspects = [gather_metadata_info])
+ }
+ )
+
+ def _foo_impl(ctx):
+ ...
+ out = ctx.actions.declare_file("%s_licenses.json" % ctx.label.name)
+ write_metadata_info(ctx, ctx.attr.deps, metadata_file)
+
+ Args:
+ ctx: context of the caller
+ deps: a list of deps which should have TransitiveMetadataInfo providers.
+ This requires that you have run the gather_metadata_info
+ aspect over them
+ json_out: output handle to write the JSON info
+ """
+ licenses = []
+ for dep in deps:
+ if TransitiveMetadataInfo in dep:
+ licenses.extend(metadata_info_to_json(dep[TransitiveMetadataInfo]))
+ ctx.actions.write(
+ output = json_out,
+ content = "[\n%s\n]\n" % ",\n".join(licenses),
+ )
+
+def metadata_info_to_json(metadata_info):
+ """Render a single LicenseInfo provider to JSON
+
+ Args:
+ metadata_info: A LicenseInfo.
+
+ Returns:
+ [(str)] list of LicenseInfo values rendered as JSON.
+ """
+
+ main_template = """ {{
+ "top_level_target": "{top_level_target}",
+ "dependencies": [{dependencies}
+ ],
+ "licenses": [{licenses}
+ ],
+ "packages": [{packages}
+ ]\n }}"""
+
+ dep_template = """
+ {{
+ "target_under_license": "{target_under_license}",
+ "licenses": [
+ {licenses}
+ ]
+ }}"""
+
+ license_template = """
+ {{
+ "label": "{label}",
+ "bazel_package": "{bazel_package}",
+ "license_kinds": [{kinds}
+ ],
+ "copyright_notice": "{copyright_notice}",
+ "package_name": "{package_name}",
+ "package_url": "{package_url}",
+ "package_version": "{package_version}",
+ "license_text": "{license_text}",
+ "used_by": [
+ {used_by}
+ ]
+ }}"""
+
+ kind_template = """
+ {{
+ "target": "{kind_path}",
+ "name": "{kind_name}",
+ "conditions": {kind_conditions}
+ }}"""
+
+ package_info_template = """
+ {{
+ "target": "{label}",
+ "bazel_package": "{bazel_package}",
+ "package_name": "{package_name}",
+ "package_url": "{package_url}",
+ "package_version": "{package_version}"
+ }}"""
+
+ # Build reverse map of license to user
+ used_by = {}
+ for dep in metadata_info.deps.to_list():
+ # Undo the concatenation applied when stored in the provider.
+ dep_licenses = dep.licenses.split(",")
+ for license in dep_licenses:
+ if license not in used_by:
+ used_by[license] = []
+ used_by[license].append(_strip_null_repo(dep.target_under_license))
+
+ all_licenses = []
+ for license in sorted(metadata_info.licenses.to_list(), key = lambda x: x.label):
+ kinds = []
+ for kind in sorted(license.license_kinds, key = lambda x: x.name):
+ kinds.append(kind_template.format(
+ kind_name = kind.name,
+ kind_path = kind.label,
+ kind_conditions = kind.conditions,
+ ))
+
+ if license.license_text:
+ # Special handling for synthetic LicenseInfo
+ text_path = (license.license_text.package + "/" + license.license_text.name if type(license.license_text) == "Label" else license.license_text.path)
+ all_licenses.append(license_template.format(
+ copyright_notice = license.copyright_notice,
+ kinds = ",".join(kinds),
+ license_text = text_path,
+ package_name = license.package_name,
+ package_url = license.package_url,
+ package_version = license.package_version,
+ label = _strip_null_repo(license.label),
+ bazel_package = _bazel_package(license.label),
+ used_by = ",\n ".join(sorted(['"%s"' % x for x in used_by[str(license.label)]])),
+ ))
+
+ all_deps = []
+ for dep in sorted(metadata_info.deps.to_list(), key = lambda x: x.target_under_license):
+ metadata_used = []
+
+ # Undo the concatenation applied when stored in the provider.
+ dep_licenses = dep.licenses.split(",")
+ all_deps.append(dep_template.format(
+ target_under_license = _strip_null_repo(dep.target_under_license),
+ licenses = ",\n ".join(sorted(['"%s"' % _strip_null_repo(x) for x in dep_licenses])),
+ ))
+
+ all_packages = []
+ # We would use this if we had distinct depsets for every provider type.
+ #for package in sorted(metadata_info.package_info.to_list(), key = lambda x: x.label):
+ # all_packages.append(package_info_template.format(
+ # label = _strip_null_repo(package.label),
+ # copyright_notice = package.copyright_notice,
+ # package_name = package.package_name,
+ # package_url = package.package_url,
+ # package_version = package.package_version,
+ # ))
+
+ for mi in sorted(metadata_info.other_metadata.to_list(), key = lambda x: x.label):
+ # Maybe use a map of provider class to formatter. A generic dict->json function
+ # in starlark would help
+
+ # This format is for using distinct providers. I like the compile time safety.
+ if mi.type == "package_info":
+ all_packages.append(package_info_template.format(
+ label = _strip_null_repo(mi.label),
+ bazel_package = _bazel_package(mi.label),
+ package_name = mi.package_name,
+ package_url = mi.package_url,
+ package_version = mi.package_version,
+ ))
+ # experimental: Support the MetadataInfo bag of data
+ if mi.type == "package_info_alt":
+ all_packages.append(package_info_template.format(
+ label = _strip_null_repo(mi.label),
+ bazel_package = _bazel_package(mi.label),
+ # data is just a bag, so we need to use get() or ""
+ package_name = mi.data.get("package_name") or "",
+ package_url = mi.data.get("package_url") or "",
+ package_version = mi.data.get("package_version") or "",
+ ))
+
+ return [main_template.format(
+ top_level_target = _strip_null_repo(metadata_info.target_under_license),
+ dependencies = ",".join(all_deps),
+ licenses = ",".join(all_licenses),
+ packages = ",".join(all_packages),
+ )]
diff --git a/rules/licenses_core.bzl b/rules/licenses_core.bzl
index 42702bd..cf476a4 100644
--- a/rules/licenses_core.bzl
+++ b/rules/licenses_core.bzl
@@ -19,6 +19,7 @@ load(
"@rules_license//rules:providers.bzl",
"LicenseInfo",
"LicensedTargetInfo",
+ "TransitiveLicensesInfo",
)
@@ -66,7 +67,7 @@ def should_traverse(ctx, attr):
return True
-def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider, filter_func):
+def _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider, filter_func):
attrs = [a for a in dir(ctx.rule.attr)]
for name in attrs:
if not filter_func(ctx, name):
@@ -96,8 +97,21 @@ def _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider,
for trace in info.traces:
traces.append("(" + ", ".join([str(ctx.label), ctx.rule.kind, name]) + ") -> " + trace)
-def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filter_func):
- """Collect license info from myself and my deps.
+ # We only need one or the other of these stanzas.
+ # If we use a polymorphic approach to metadata providers, then
+ # this works.
+ if hasattr(info, "other_metadata"):
+ if info.other_metadata:
+ trans_other_metadata.append(info.other_metadata)
+ # But if we want more precise type safety, we would have a
+ # trans_* for each type of metadata. That is not user
+ # extensibile.
+ if hasattr(info, "package_info"):
+ if info.package_info:
+ trans_package_info.append(info.package_info)
+
+def gather_metadata_info_common(target, ctx, provider_factory, namespaces, metadata_providers, filter_func):
+ """Collect license and other metadata info from myself and my deps.
Any single target might directly depend on a license, or depend on
something that transitively depends on a license, or neither.
@@ -116,6 +130,7 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte
ctx: The aspect evaluation context.
provider_factory: abstracts the provider returned by this aspect
namespaces: a list of namespaces licenses must match to be included
+ metadata_providers: a list of other providers of interest
filter_func: a function that returns true iff the dep edge should be ignored
Returns:
@@ -124,6 +139,8 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte
# First we gather my direct license attachments
licenses = []
+ other_metadata = []
+ package_info = []
if ctx.rule.kind == "_license":
# Don't try to gather licenses from the license rule itself. We'll just
# blunder into the text file of the license and pick up the default
@@ -144,14 +161,18 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte
licenses.append(lic)
else:
fail("should have a namespace")
-
+ for m_p in metadata_providers:
+ if m_p in dep:
+ other_metadata.append(dep[m_p])
# Now gather transitive collection of providers from the targets
# this target depends upon.
trans_licenses = []
+ trans_other_metadata = []
+ trans_package_info = []
trans_deps = []
traces = []
- _get_transitive_licenses(ctx, trans_licenses, trans_deps, traces, provider_factory, filter_func)
+ _get_transitive_metadata(ctx, trans_licenses, trans_other_metadata, trans_package_info, trans_deps, traces, provider_factory, filter_func)
if not licenses and not trans_licenses:
return [provider_factory(deps = depset(), licenses = depset(), traces = [])]
@@ -179,9 +200,22 @@ def gather_licenses_info_common(target, ctx, provider_factory, namespaces, filte
else:
direct_license_uses = None
+ # This is a bit of a hack for bazel 5.x. We can not pass extra fields to
+ # the provider constructor, so we need to do something special for each.
+ # In Bazel 6.x we can use a provider initializer function that would take
+ # all the args and only use the ones it wants.
+ if provider_factory == TransitiveLicensesInfo:
+ return [provider_factory(
+ target_under_license = target.label,
+ licenses = depset(tuple(licenses), transitive = trans_licenses),
+ deps = depset(direct = direct_license_uses, transitive = trans_deps),
+ traces = traces,
+ )]
+
return [provider_factory(
target_under_license = target.label,
licenses = depset(tuple(licenses), transitive = trans_licenses),
+ other_metadata = depset(tuple(other_metadata), transitive = trans_other_metadata),
deps = depset(direct = direct_license_uses, transitive = trans_deps),
traces = traces,
)]
diff --git a/rules/package_info.bzl b/rules/package_info.bzl
new file mode 100644
index 0000000..8f5460a
--- /dev/null
+++ b/rules/package_info.bzl
@@ -0,0 +1,106 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Rules for declaring metadata about a package."""
+
+load(
+ "@rules_license//rules:providers.bzl",
+ "MetadataInfo",
+ "PackageInfo",
+)
+
+#
+# package_info()
+#
+
+def _package_info_impl(ctx):
+ provider = PackageInfo(
+ # Metadata providers must include a type discriminator. We don't need it
+ # to collect the providers, but we do need it to write the JSON. We
+ # key on the type field to look up the correct block of code to pull
+ # data out and format it. We can't to the lookup on the provider class.
+ type = "package_info",
+ label = ctx.label,
+ package_name = ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"),
+ package_url = ctx.attr.package_url,
+ package_version = ctx.attr.package_version,
+ )
+ # Experimental alternate design, using a generic 'data' back to hold things
+ generic_provider = MetadataInfo(
+ type = "package_info_alt",
+ label = ctx.label,
+ data = {
+ "package_name": ctx.attr.package_name or ctx.build_file_path.rstrip("/BUILD"),
+ "package_url": ctx.attr.package_url,
+ "package_version": ctx.attr.package_version
+ }
+ )
+ return [provider, generic_provider]
+
+_package_info = rule(
+ implementation = _package_info_impl,
+ attrs = {
+ "copyright_notice": attr.string(
+ doc = "Copyright notice.",
+ ),
+ "package_name": attr.string(
+ doc = "A human readable name identifying this package." +
+ " This may be used to produce an index of OSS packages used by" +
+ " an applicatation.",
+ ),
+ "package_url": attr.string(
+ doc = "The URL this instance of the package was download from." +
+ " This may be used to produce an index of OSS packages used by" +
+ " an applicatation.",
+ ),
+ "package_version": attr.string(
+ doc = "A human readable version string identifying this package." +
+ " This may be used to produce an index of OSS packages used" +
+ " by an applicatation. It should be a value that" +
+ " increases over time, rather than a commit hash."
+ ),
+ },
+)
+
+# buildifier: disable=function-docstring-args
+def package_info(
+ name,
+ copyright_notice = None,
+ package_name = None,
+ package_url = None,
+ package_version = None,
+ visibility = ["//visibility:public"]):
+ """Wrapper for package_info rule.
+
+ Args:
+ name: str target name.
+ license_kind: label a single license_kind. Only one of license_kind or license_kinds may
+ be specified
+ license_kinds: list(label) list of license_kind targets.
+ copyright_notice: str Copyright notice associated with this package.
+ package_name : str A human readable name identifying this package. This
+ may be used to produce an index of OSS packages used by
+ an application.
+ tags: list(str) tags applied to the rule
+ """
+ _package_info(
+ name = name,
+ copyright_notice = copyright_notice,
+ package_name = package_name,
+ package_url = package_url,
+ package_version = package_version,
+ applicable_licenses = [],
+ visibility = visibility,
+ tags = [],
+ testonly = 0,
+ )
diff --git a/rules/providers.bzl b/rules/providers.bzl
index 8778fd7..b8e61ce 100644
--- a/rules/providers.bzl
+++ b/rules/providers.bzl
@@ -59,3 +59,41 @@ def licenses_info():
# This provider is used by the aspect that is used by manifest() rules.
TransitiveLicensesInfo = licenses_info()
+
+# This is one way to do specify data
+PackageInfo = provider(
+ doc = """Provides information about a package.""",
+ fields = {
+ "type": "string: How to interpret data",
+ "label": "Label: label of the package_info rule",
+ "package_name": "string: Human readable package name",
+ "package_url": "string: URL from which this package was downloaded.",
+ "package_version": "string: Human readable version string",
+ },
+)
+
+# This is more extensible. Because of the provider implementation, having a big
+# dict of values rather than named fields is not much more costly.
+# Design choice. Replace data with actual providers, such as PackageInfo
+MetadataInfo = provider(
+ doc = """Generic bag of metadata.""",
+ fields = {
+ "type": "string: How to interpret data",
+ "label": "Label: label of the metadata rule",
+ "data": "String->any: Map of names to values",
+ }
+)
+
+TransitiveMetadataInfo = provider(
+ doc = """The transitive set of licenses used by a target.""",
+ fields = {
+ "top_level_target": "Label: The top level target label.",
+ "other_metadata": "depset(MetatdataInfo)",
+ "licenses": "depset(LicenseInfo)",
+ "package_info": "depset(PackageInfo)",
+
+ "target_under_license": "Label: The top level target label.",
+ "deps": "depset(LicensedTargetInfo): The transitive list of dependencies that have licenses.",
+ "traces": "list(string) - diagnostic for tracing a dependency relationship to a target.",
+ },
+)
diff --git a/rules/sbom.bzl b/rules/sbom.bzl
new file mode 100644
index 0000000..fb17adc
--- /dev/null
+++ b/rules/sbom.bzl
@@ -0,0 +1,159 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""SBOM generation"""
+
+load(
+ "@rules_license//rules:gather_metadata.bzl",
+ "gather_metadata_info",
+ "gather_metadata_info_and_write",
+ "write_metadata_info",
+)
+load(
+ "@rules_license//rules:providers.bzl",
+ "TransitiveLicensesInfo",
+)
+
+# This rule is proof of concept, and may not represent the final
+# form of a rule for compliance validation.
+def _generate_sbom_impl(ctx):
+ # Gather all licenses and write information to one place
+
+ licenses_file = ctx.actions.declare_file("_%s_licenses_info.json" % ctx.label.name)
+ write_metadata_info(ctx, ctx.attr.deps, licenses_file)
+
+ license_files = []
+ # if ctx.outputs.license_texts:
+ # license_files = get_licenses_mapping(ctx.attr.deps).keys()
+
+ # Now turn the big blob of data into something consumable.
+ inputs = [licenses_file]
+ outputs = [ctx.outputs.out]
+ args = ctx.actions.args()
+ args.add("--licenses_info", licenses_file.path)
+ args.add("--out", ctx.outputs.out.path)
+ ctx.actions.run(
+ mnemonic = "CreateSBOM",
+ progress_message = "Creating SBOM for %s" % ctx.label,
+ inputs = inputs,
+ outputs = outputs,
+ executable = ctx.executable._sbom_generator,
+ arguments = [args],
+ )
+ outputs.append(licenses_file) # also make the json file available.
+ return [DefaultInfo(files = depset(outputs))]
+
+_generate_sbom = rule(
+ implementation = _generate_sbom_impl,
+ attrs = {
+ "deps": attr.label_list(
+ aspects = [gather_metadata_info],
+ ),
+ "out": attr.output(mandatory = True),
+ "_sbom_generator": attr.label(
+ default = Label("@rules_license//tools:write_sbom"),
+ executable = True,
+ allow_files = True,
+ cfg = "exec",
+ ),
+ },
+)
+
+def generate_sbom(**kwargs):
+ _generate_sbom(**kwargs)
+
+def _manifest_impl(ctx):
+ # Gather all licenses and make it available as deps for downstream rules
+ # Additionally write the list of license filenames to a file that can
+ # also be used as an input to downstream rules.
+ licenses_file = ctx.actions.declare_file(ctx.attr.out.name)
+ mappings = get_licenses_mapping(ctx.attr.deps, ctx.attr.warn_on_legacy_licenses)
+ ctx.actions.write(
+ output = licenses_file,
+ content = "\n".join([",".join([f.path, p]) for (f, p) in mappings.items()]),
+ )
+ return [DefaultInfo(files = depset(mappings.keys()))]
+
+_manifest = rule(
+ implementation = _manifest_impl,
+ doc = """Internal tmplementation method for manifest().""",
+ attrs = {
+ "deps": attr.label_list(
+ doc = """List of targets to collect license files for.""",
+ aspects = [gather_metadata_info],
+ ),
+ "out": attr.output(
+ doc = """Output file.""",
+ mandatory = True,
+ ),
+ "warn_on_legacy_licenses": attr.bool(default = False),
+ },
+)
+
+def manifest(name, deps, out = None, **kwargs):
+ if not out:
+ out = name + ".manifest"
+
+ _manifest(name = name, deps = deps, out = out, **kwargs)
+
+def _licenses_used_impl(ctx):
+ # Gather all licenses and make it available as JSON
+ write_metadata_info(ctx, ctx.attr.deps, ctx.outputs.out)
+ return [DefaultInfo(files = depset([ctx.outputs.out]))]
+
+_licenses_used = rule(
+ implementation = _licenses_used_impl,
+ doc = """Internal tmplementation method for licenses_used().""",
+ attrs = {
+ "deps": attr.label_list(
+ doc = """List of targets to collect LicenseInfo for.""",
+ aspects = [gather_metadata_info_and_write],
+ ),
+ "out": attr.output(
+ doc = """Output file.""",
+ mandatory = True,
+ ),
+ },
+)
+
+def get_licenses_mapping(deps, warn = False):
+ """Creates list of entries representing all licenses for the deps.
+
+ Args:
+
+ deps: a list of deps which should have TransitiveLicensesInfo providers.
+ This requires that you have run the gather_licenses_info
+ aspect over them
+
+ warn: boolean, if true, display output about legacy targets that need
+ update
+
+ Returns:
+ {File:package_name}
+ """
+ tls = []
+ for dep in deps:
+ lds = dep[TransitiveLicensesInfo].licenses
+ tls.append(lds)
+
+ ds = depset(transitive = tls)
+
+ # Ignore any legacy licenses that may be in the report
+ mappings = {}
+ for lic in ds.to_list():
+ if type(lic.license_text) == "File":
+ mappings[lic.license_text] = lic.package_name
+ elif warn:
+ print("Legacy license %s not included, rule needs updating" % lic.license_text)
+
+ return mappings
diff --git a/tools/BUILD b/tools/BUILD
index 9be1c2d..bc4005f 100644
--- a/tools/BUILD
+++ b/tools/BUILD
@@ -21,6 +21,14 @@ package(
licenses(["notice"])
+filegroup(
+ name = "standard_package",
+ srcs = glob(["**"]),
+ visibility = ["//distro:__pkg__"],
+)
+
+exports_files(["diff_test.sh"])
+
py_binary(
name = "checker_demo",
srcs = ["checker_demo.py"],
@@ -28,10 +36,9 @@ py_binary(
visibility = ["//visibility:public"],
)
-exports_files(["diff_test.sh"])
-
-filegroup(
- name = "standard_package",
- srcs = glob(["**"]),
- visibility = ["//distro:__pkg__"],
+py_binary(
+ name = "write_sbom",
+ srcs = ["write_sbom.py"],
+ python_version = "PY3",
+ visibility = ["//visibility:public"],
)
diff --git a/tools/write_sbom.py b/tools/write_sbom.py
new file mode 100644
index 0000000..18286ab
--- /dev/null
+++ b/tools/write_sbom.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Proof of concept license checker.
+
+This is only a demonstration. It will be replaced with other tools.
+"""
+
+import argparse
+import codecs
+import datetime
+import json
+import os
+
+
+TOOL = 'https//github.com/bazelbuild/rules_license/tools:write_sbom'
+
+def _load_package_data(package_info):
+ with codecs.open(package_info, encoding='utf-8') as inp:
+ return json.loads(inp.read())
+
+def _write_sbom_header(out, package):
+ header = [
+ 'SPDXVersion: SPDX-2.2',
+ 'DataLicense: CC0-1.0',
+ 'SPDXID: SPDXRef-DOCUMENT',
+ 'DocumentName: %s' % package,
+ # TBD
+ # 'DocumentNamespace: https://swinslow.net/spdx-examples/example1/hello-v3
+ 'Creator: Person: %s' % os.getlogin(),
+ 'Creator: Tool: %s' % TOOL,
+ datetime.datetime.utcnow().strftime('Created: %Y-%m-%d-%H:%M:%SZ'),
+ '',
+ '##### Package: %s' % package,
+ ]
+ out.write('\n'.join(header))
+
+
+
+def _write_sbom(out, packages):
+ """Produce a basic SBOM
+
+ Args:
+ out: file object to write to
+ packages: package metadata. A big blob of JSON.
+ """
+ for p in packages:
+ name = p.get('package_name') or '<unknown>'
+ out.write('\n')
+ out.write('SPDXID: "%s"\n' % name)
+ out.write(' name: "%s"\n' % name)
+ if p.get('package_version'):
+ out.write(' versionInfo: "%s"\n' % p['package_version'])
+ # IGNORE_COPYRIGHT: Not a copyright notice. It is a variable holding one.
+ cn = p.get('copyright_notice')
+ if cn:
+ out.write(' copyrightText: "%s"\n' % cn)
+ kinds = p.get('license_kinds')
+ if kinds:
+ out.write(' licenseDeclared: "%s"\n' %
+ ','.join([k['name'] for k in kinds]))
+ url = p.get('package_url')
+ if url:
+ out.write(' downloadLocation: %s\n' % url)
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Demonstraton license compliance checker')
+
+ parser.add_argument('--licenses_info',
+ help='path to JSON file containing all license data')
+ parser.add_argument('--out', default='sbom.out', help='SBOM output')
+ args = parser.parse_args()
+
+ license_data = _load_package_data(args.licenses_info)
+ target = license_data[0] # we assume only one target for the demo
+
+ top_level_target = target['top_level_target']
+ dependencies = target['dependencies']
+ # It's not really packages, but this is close proxy for now
+ licenses = target['licenses']
+ package_infos = target['packages']
+
+ # These are similar dicts, so merge them by package. This is not
+ # strictly true, as different licenese can appear in the same
+ # package, but it is good enough for demonstrating the sbom.
+
+ all = {x['bazel_package']: x for x in licenses}
+ for pi in package_infos:
+ p = all.get(pi['bazel_package'])
+ if p:
+ p.update(pi)
+ else:
+ all[pi['bazel_package']] = pi
+
+ err = 0
+ with codecs.open(args.out, mode='w', encoding='utf-8') as out:
+ _write_sbom_header(out, package=top_level_target)
+ _write_sbom(out, all.values())
+ return err
+
+
+if __name__ == '__main__':
+ main()