From 2c81f47508fa6bce9df84e3b43dfb16dffb742a0 Mon Sep 17 00:00:00 2001
From: Raphael Geissert <geissert@debian.org>
Date: Thu, 12 Sep 2024 15:51:05 +0300
Subject: [PATCH] Avoid directory traversal when extracting archives

Description of the vulnerability from the NIST CVE tracker [1]:

    Multiple directory traversal vulnerabilities in the (1)
    tar_extract_glob and (2) tar_extract_all functions in libtar 1.2.20
    and earlier allow remote attackers to overwrite arbitrary files via
    a .. (dot dot) in a crafted tar file.

Imported from the Debian libtar package 1.2.20-8 [2]. Original Debian
description:

    Author: Raphael Geissert <geissert@debian.org>
    Bug-Debian: https://bugs.debian.org/731860
    Description: Avoid directory traversal when extracting archives
    by skipping over leading slashes and any prefix containing ".." components.
    Forwarded: yes

meta-openembedded uses Debian's release tarball [3]. Debian uses
repo.or.cz/libtar.git as their upstream [4]. repo.or.cz/libtar.git has
been inactive since 2013 [5].

CVE: CVE-2013-4420

Upstream-Status: Inactive-Upstream [lastrelease: 2013 lastcommit: 2013]

Comments: Added the commit message

[1] https://nvd.nist.gov/vuln/detail/CVE-2013-4420
[2] https://sources.debian.org/patches/libtar/1.2.20-8/CVE-2013-4420.patch/
[3] https://git.openembedded.org/meta-openembedded/tree/meta-oe/recipes-support/libtar/libtar_1.2.20.bb?h=master#n8
[4] http://svn.kibibyte.se/libtar/trunk/debian/control (rev 51; not tagged)
[5] https://repo.or.cz/libtar.git/shortlog/refs/heads/master

Signed-off-by: Katariina Lounento <katariina.lounento@vaisala.com>
---
 lib/decode.c   | 33 +++++++++++++++++++++++++++++++--
 lib/extract.c  |  8 ++++----
 lib/internal.h |  1 +
 lib/output.c   |  4 ++--
 4 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/lib/decode.c b/lib/decode.c
index 35312be..edd5f2e 100644
--- a/lib/decode.c
+++ b/lib/decode.c
@@ -22,13 +22,42 @@
 # include <string.h>
 #endif
 
+char *
+safer_name_suffix (char const *file_name)
+{
+	char const *p, *t;
+	p = t = file_name;
+	while (*p == '/') t = ++p;
+	while (*p)
+	{
+		while (p[0] == '.' && p[0] == p[1] && p[2] == '/')
+		{
+			p += 3;
+			t = p;
+		}
+		/* advance pointer past the next slash */
+		while (*p && (p++)[0] != '/');
+	}
+
+	if (!*t)
+	{
+		t = ".";
+	}
+
+	if (t != file_name)
+	{
+		/* TODO: warn somehow that the path was modified */
+	}
+	return (char*)t;
+}
+
 
 /* determine full path name */
 char *
 th_get_pathname(TAR *t)
 {
 	if (t->th_buf.gnu_longname)
-		return t->th_buf.gnu_longname;
+		return safer_name_suffix(t->th_buf.gnu_longname);
 
 	/* allocate the th_pathname buffer if not already */
 	if (t->th_pathname == NULL)
@@ -50,7 +79,7 @@ th_get_pathname(TAR *t)
 	}
 
 	/* will be deallocated in tar_close() */
-	return t->th_pathname;
+	return safer_name_suffix(t->th_pathname);
 }
 
 
diff --git a/lib/extract.c b/lib/extract.c
index 9fc6ad5..4ff1a95 100644
--- a/lib/extract.c
+++ b/lib/extract.c
@@ -302,14 +302,14 @@ tar_extract_hardlink(TAR * t, char *realname)
 	if (mkdirhier(dirname(filename)) == -1)
 		return -1;
 	libtar_hashptr_reset(&hp);
-	if (libtar_hash_getkey(t->h, &hp, th_get_linkname(t),
+	if (libtar_hash_getkey(t->h, &hp, safer_name_suffix(th_get_linkname(t)),
 			       (libtar_matchfunc_t)libtar_str_match) != 0)
 	{
 		lnp = (char *)libtar_hashptr_data(&hp);
 		linktgt = &lnp[strlen(lnp) + 1];
 	}
 	else
-		linktgt = th_get_linkname(t);
+		linktgt = safer_name_suffix(th_get_linkname(t));
 
 #ifdef DEBUG
 	printf("  ==> extracting: %s (link to %s)\n", filename, linktgt);
@@ -347,9 +347,9 @@ tar_extract_symlink(TAR *t, char *realname)
 
 #ifdef DEBUG
 	printf("  ==> extracting: %s (symlink to %s)\n",
-	       filename, th_get_linkname(t));
+	       filename, safer_name_suffix(th_get_linkname(t)));
 #endif
-	if (symlink(th_get_linkname(t), filename) == -1)
+	if (symlink(safer_name_suffix(th_get_linkname(t)), filename) == -1)
 	{
 #ifdef DEBUG
 		perror("symlink()");
diff --git a/lib/internal.h b/lib/internal.h
index da7be7f..f05ca4f 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -21,3 +21,4 @@
 #define TLS_THREAD
 #endif
 
+char* safer_name_suffix(char const*);
diff --git a/lib/output.c b/lib/output.c
index a5262ee..af754f1 100644
--- a/lib/output.c
+++ b/lib/output.c
@@ -124,9 +124,9 @@ th_print_long_ls(TAR *t)
 		else
 			printf(" link to ");
 		if ((t->options & TAR_GNU) && t->th_buf.gnu_longlink != NULL)
-			printf("%s", t->th_buf.gnu_longlink);
+			printf("%s", safer_name_suffix(t->th_buf.gnu_longlink));
 		else
-			printf("%.100s", t->th_buf.linkname);
+			printf("%.100s", safer_name_suffix(t->th_buf.linkname));
 	}
 
 	putchar('\n');
