ID3 Fix for Icecast MP3s

Posted on December 17, 2009

Icecast is a simple music server that can be run from the Linux command line. I use it for a quick and simple streaming of my music library. The new version, using the “ices2” library, only reads Ogg Vorbis audio files. But there is an older, deprecated library (ices0) that reads MP3 files. This is what I use, since almost all my audio is in MP3 format.

Problem

Everything works pretty well, but there is one annoyance — the artist and song titles are sometimes displayed with garbage characters inside them, and are truncated. Curious about what was causing this, and to try and fix it, I investigated.

(If you don’t want to read my story about finding the solution, you can skip to the end for the fix.)

Debugger

To find the root of the problem, I modified the ices software so all it does is print out info about an MP3 file. Most of the time spent on this mini-project was actually just tracing the logic and finding a way to strip ices down to the ID3 routines.

You can download my files if you’re interested in following along with the process, but I’ll also post the code below. To use them, simply unzip and copy the files over the top of the ices0 source files. Update: All the files can now be found in a GitHub repo.

test.h

#ifndef TEST_H
#define TEST_H

#define HAVE_ERRNO_H
#define HAVE_UNISTD_H
#define HAVE_SYS_TYPES_H
#define HAVE_SYS_STAT_H
#define HAVE_FCNTL_H
#define HAVE_SHOUT_SHOUT_H

#define ices_log(...)  printf("\n" __VA_ARGS__)
#define ices_log_debug(...)  printf("\n" __VA_ARGS__)
#define ices_log_error(...)  printf("\n" __VA_ARGS__)

#endif // TEST_H

Test.h is the header file for my program, so it is a new file. It first manually defines some constants; normally these are set during configuration and compilation, but I didn’t want to get fancy. Of course, you’ll need to have the appropriate developer libraries on your machine. (The “shout” library was the only one I was missing, and it was available as Ubuntu package “libshout3-dev”.)

This file also replaces the ices logging macros with versions that print to the terminal. Ices already calls these functions throughout its code base, so that makes it easier for me.

test.c

#include <stdio.h>
#include "definitions.h"
#include "metadata.h"
#include "icestypes.h"
#include "id3.h"

#define INPUT_BUFSIZ 4096

/* open up path, figure out what kind of input it is, and set up source */
static int
stream_open_source (input_stream_t* source)
{
  char buf[INPUT_BUFSIZ];
  size_t len;
  int fd;
  int rc;

  source->filesize = 0;
  source->bytes_read = 0;
  source->channels = 2;

  if (source->path[0] == '-' && source->path[1] == '\0') {
    ices_log_debug("Reading audio from stdin");
    fd = 0;
  } else if ((fd = open (source->path, O_RDONLY)) < 0) {
     ices_util_strerror (errno, buf, sizeof (buf));
     ices_log_error ("Error opening: %s", buf);
     return -1;
   }
   source->fd = fd;

  if ((rc = lseek (fd, 0, SEEK_END)) >= 0) {
    source->filesize = rc;
    lseek (fd, 0, SEEK_SET);
  }

  if ((len = read (fd, buf, sizeof (buf))) <= 0) {
     ices_util_strerror (errno, buf, sizeof (buf));
     //ices_log_error ("Error reading header: %s", source->path, buf);
	ices_log_error ("Error reading header: %s", source->path);

    close (fd);
    return -1;
  }

#ifdef HAVE_LIBFLAC
  if (!(rc = ices_flac_open (source, buf, len)))
    return 0;
  if (rc < 0) {
    close(fd);
    return -1;
  }
#endif

#ifdef HAVE_LIBFAAD
  if (!(rc = ices_mp4_open (source, buf, len)))
    return 0;
  if (rc < 0) {
    close(fd);
    return -1;
  }
#endif

  if (!(rc = ices_mp3_open (source, buf, len)))
    return 0;
  if (rc < 0) {
    close (fd);
    return -1;
  }

#ifdef HAVE_LIBVORBISFILE
  if (!(rc = ices_vorbis_open (source, buf, len)))
    return 0;
#endif

  close (fd);
  return -1;
}

int main(int argc, char* argv[])
{
  if (argc < 2)
  {
    printf("You must specify a filename\n");
    return 1;
  }

  input_stream_t source;
  source.path = argv[1];
  ices_metadata_set (NULL, NULL);
  ices_metadata_set_file (source.path);
  if (stream_open_source (&source) < 0)
  {
    printf("stream_open_source() returned an error\n");
    ices_util_free (source.path);
  }

  printf("\ndone.\n");
  return 0;
}

Test.c is the main program. It includes stdio.h (for the printf function) and the necessary ices headers.

The INPUT_BUFSIZ constant and stream_open_source function are copied verbatim from stream.c. This function is necessary for opening the MP3s, but the rest of stream.c is unneeded, and this was easier than trying to compile in the stream.c file.

The final piece, the main function, is pretty simple. It takes a filename as a command-line argument and passes it to the stream_open_source function. This specific code was also lifted from ices.

definitions.h diff

Index: definitions.h
===================================================================
--- definitions.h	(revision 16750)
+++ definitions.h	(working copy)
@@ -21,6 +21,8 @@
 #ifndef _ICES_DEFINITIONS_H
 #define _ICES_DEFINITIONS_H

+#include "test.h"
+
 #ifdef _WIN32
 # include
 #else

The ices definitions.h file requires my test.h definitions first.

log.h diff

Index: log.h
===================================================================
--- log.h	(revision 16750)
+++ log.h	(working copy)
@@ -19,6 +19,7 @@
  */

 /* Public function declarations */
+/*
 void ices_log (const char *fmt, ...);
 void ices_log_error (const char *fmt, ...);
 void ices_log_debug (const char *fmt, ...);
@@ -27,6 +28,7 @@
 void ices_log_initialize (void);
 void ices_log_shutdown (void);
 void ices_log_daemonize (void);
+*/

I commented out the entire log.h file, since I redefined the functions in test.h.

metadata.c diff

Index: metadata.c
===================================================================
--- metadata.c	(revision 16750)
+++ metadata.c	(working copy)
@@ -105,7 +105,7 @@
   if (delay)
     usleep (delay);

-  if (! (playlist_metadata = ices_playlist_get_metadata ())) {
+  if (! (playlist_metadata = NULL)) {
     if (Title) {
       if (Artist)
 	snprintf (song, sizeof (song), "%s - %s", Artist, Title);
@@ -131,6 +131,7 @@
     return;
   }

+  /*
   for (stream = ices_config.streams; stream; stream = stream->next) {
     rc = shout_set_metadata (stream->conn, metadata);

@@ -139,6 +140,7 @@
     else
       ices_log_debug ("Updated metadata on %s to: %s", stream->mount, value);
   }
+  */ rc = 0; stream = 0;

   ices_util_free (playlist_metadata);
   shout_metadata_free (metadata);

Finally, I had to make some adjustments to metadata.c so that the program doesn’t reference the non-existent stream or playlist.

Build

Once you’ve made these changes and installed all the third-party developer libraries, you can compile the program like this:

gcc -I /usr/include -o test test.c mp3.c id3.c metadata.c util.c -lshout

My development libraries are in /usr/include — you may have to adjust that path for your machine. The compile will generate a lot of warnings, but they can be ignored.

Testing

Now that we have a program that prints out the ID3 info for an MP3 file, let’s test it out.

I have two files, which I have named bad.mp3 and good.mp3. Let’s make sure that the good MP3 checks out:

> ./test good.mp3
Filename cleaned up from [good.mp3] to [good]
ID3v1: Title: One Of Those Christmas Days
ID3v1: Artist: Third Eye Blind
MPEG-1 layer III, 128 kbps, 44100 Hz, j-stereo
Ext: 0	Mode_Ext: 2	Copyright: 0	Original: 1
Error Protection: 0	Emphasis: 0	Padding: 0
done.

The ID3 info is prefixed with “ID3v1:” here. As expected, everything looks good. Now let’s try the bad MP3:

> ./test bad.mp3
Filename cleaned up from [bad.mp3] to [bad]
ID3v1: Title: Fairytale of New Your
ID3v1: Artist: Pilate
ID3v2: version 3.0. Tag size is 48832 bytes.
ID3v2: Frame type [TFLT] found, 7 bytes
ID3v2: Frame type [TIT2] found, 47 bytes
ID3v2: Title found: ��F
ID3v2: Frame type [TPE1] found, 17 bytes
ID3v2: Artist found: ��P
VBR tag found
MPEG-1 layer III, VBR, 44100 Hz, j-stereo
Ext: 0	Mode_Ext: 2	Copyright: 0	Original: 1
Error Protection: 0	Emphasis: 0	Padding: 0
done.

Hmmm… the v1 metadata looks fine, but the v2 stuff is wrong. It looks like the first letter is being included (at the end), but where is the rest of it? The length is long enough (47 and 17 bytes). And what’s the extra garbage at the beginning?

If you’re not familiar with the history of the MP3 format, version 2 of ID3 extended the original specification (version 1). The two sets of data (v1 and v2) can actually exist side-by-side in an MP3 file, as shown in this example. If both exist, MP3 players will use the newer version.

So, the problem lies in the interpretation of the ID3v2 data. In the ices library, this is defined in the id3v2_read_frame function, which reads the individual metadata fields.

A Closer Look

Let’s add a little output to see exactly what these bytes are.

id3.c diff

Index: id3.c
===================================================================
--- id3.c       (revision 16750)
+++ id3.c       (working copy)
@@ -254,6 +254,16 @@
      len2 -= rlen;
    }

+printf("\n");
+int i = 0;
+for (i = 0; i < len; i++)
+{
+  if (buf[i] >= 32 && buf[i] < 255) /* readable ASCII */
+    printf("%c ", buf[i]);
+  else
+   printf("0x%X ", buf[i]);
+}
+
    /* skip encoding */
    if (!strcmp (hdr, ID3V2_TITLE_TAG(tag))) {
      buf[len] = '\0';

Now if we run the program on the bad MP3:

> ./test bad.mp3
Filename cleaned up from [bad.mp3] to [bad]
ID3v1: Title: Fairytale of New Your
ID3v1: Artist: Pilate
ID3v2: version 3.0. Tag size is 48832 bytes.
ID3v2: Frame type [TFLT] found, 7 bytes
ID3v2: Frame type [TIT2] found, 47 bytes
0x1 0xFFFFFFFF 0xFFFFFFFE F 0x0 a 0x0 i 0x0 r 0x0 y 0x0 t 0x0 a 0x0 l 0x0 e 0x0    0x0 o 0x0 f 0x0    0x0 N 0x0 e 0x0 w 0x0    0x0 Y 0x0 o 0x0 u 0x0 r 0x0 0x0 0x0
ID3v2: Title found: ��F
ID3v2: Frame type [TPE1] found, 17 bytes
0x1 0xFFFFFFFF 0xFFFFFFFE P 0x0 i 0x0 l 0x0 a 0x0 t 0x0 e 0x0 0x0 0x0
ID3v2: Artist found: ��P
VBR tag found
MPEG-1 layer III, VBR, 44100 Hz, j-stereo
Ext: 0 Mode_Ext: 2 Copyright: 0 Original: 1
Error Protection: 0 Emphasis: 0 Padding: 0
done.

Just looking at the hex of the metadata, it’s obviously not simple ASCII text. So what is it?

Specs

The ID3 specification says the following:

If nothing else is said a string is represented as ISO-8859-1
[ISO-8859-1] characters in the range $20 – $FF. Such strings are
represented as <text string>, or <full text string> if newlines are
allowed, in the frame descriptions. All Unicode strings [UNICODE] use
16-bit unicode 2.0 (ISO/IEC 10646-1:1993, UCS-2). Unicode strings
must begin with the Unicode BOM ($FF FE or $FE FF) to identify the
byte order.

(from section 3.3: ID3v2 frame overview)

Ah, character encoding — my old nemesis.

With 16-bit Unicode (using UCS-2 encoding), every other byte in the string is $00 (or NUL), which C would interpret as the premature end of the string. The garbage at the beginning is the BOM.

It looks like the Icecast guys failed (or intentionally ignored) to add Unicode support. This may not have mattered with old programs or when ID3v1 was the most popular, but it’s much more common nowadays.

Solution

So, what to do?

Support Unicode

It would be fun (and handy) to add Unicode support. Although the newest version of Icecast (v2.3.2 at the time of this writing) supports UTF-8, it doesn’t support UTF-16, and the rest of ices0 certainly doesn’t.

Ignore ID3v2

The easiest thing is just to disable ID3v2 support and rely on ID3v1 — almost all of my MP3s have ID3v1. (If they don’t, I manually copy the ID3v2 tag over using my MP3 player.)

Disabling ID3v2 support is pretty easy in ices0 — just comment out 2 lines:

Index: mp3.c
===================================================================
--- mp3.c       (revision 16750)
+++ mp3.c       (working copy)
@@ -111,8 +111,8 @@
    return 1;

  /* first check for ID3v2 */
-  if (! strncmp ("ID3", mp3_data->buf, 3))
-    ices_id3v2_parse (source);
+  /*if (! strncmp ("ID3", mp3_data->buf, 3))
+    ices_id3v2_parse (source); */

  /* ensure we have at least 4 bytes in the read buffer */
  if (!mp3_data->buf || mp3_data->len - mp3_data->pos < 4)

Convert to ASCII

We can also convert it from UCS-2 to ISO-8859-1 (the standard ASCII character set). There are existing libraries for character encoding. (In fact, Icecast itself uses libxml2, which contains encoding conversion tools.) But UCS-2 is fairly simple (famous last words) and most of my MP3s use ASCII characters, so let’s do a simple reduction down to this character set.

Index: id3.c
===================================================================
--- id3.c       (revision 16750)
+++ id3.c       (working copy)
@@ -254,7 +254,45 @@
      len2 -= rlen;
    }

-    /* skip encoding */
+    /* Convert Unicode strings to ISO-8859-1 using simple reduction. */
+    /* Breakdown of buf by bytes, according to ID3v2 specs:
+        0   - Unicode/ISO-8859-1 flag
+        1-2 - BOM (if Unicode) or string data (if ISO-8859-1)
+        3+  - string data
+     */
+    if (buf[0] == 0x1) /* 1 = Unicode, 0 = ISO-8859-1 */
+    {
+      /* Determine the byte order by looking at the BOM */
+      int byte_order;
+      if (buf[1] == 0xFFFFFFFF && buf[2] == 0xFFFFFFFE)
+      {
+        byte_order = 1;
+      }
+      else
+      {
+        byte_order = 0;
+      }
+
+      /* Modify the buffer in place. */
+      int read_pos  = 3; /* skip BOM */
+      int write_pos = 1; /* overwrite the BOM */
+      while (read_pos < len)
+      {
+        if (buf[read_pos+byte_order] == 0) /* ASCII character */
+        {
+          buf[write_pos] = (byte_order ? buf[read_pos] : buf[read_pos+byte_order]);
+        }
+        else
+        {
+          buf[write_pos] = '?'; /* use a question mark for extended characters */
+        }
+
+        read_pos  += 2; /* 2 bytes for UTF-16 */
+        write_pos += 1; /* 1 byte for ISO-8859-1 */
+      }
+      buf[write_pos] = '\0';
+    }
+
    if (!strcmp (hdr, ID3V2_TITLE_TAG(tag))) {
      buf[len] = '\0';
      ices_log_debug ("ID3v2: Title found: %s", buf + 1);

And here’s the new output:

> ./test bad.mp3
Filename cleaned up from [bad.mp3] to [bad]
ID3v1: Title: Fairytale of New Your
ID3v1: Artist: Pilate
ID3v2: version 3.0. Tag size is 48832 bytes.
ID3v2: Frame type [TFLT] found, 7 bytes
ID3v2: Frame type [TIT2] found, 47 bytes
ID3v2: Title found: Fairytale of New Your
ID3v2: Frame type [TPE1] found, 17 bytes
ID3v2: Artist found: Pilate
VBR tag found
MPEG-1 layer III, VBR, 44100 Hz, j-stereo
Ext: 0 Mode_Ext: 2 Copyright: 0 Original: 1
Error Protection: 0 Emphasis: 0 Padding: 0
done.

It works!

So, in conclusion, just add the marked lines from the above diff (starting on line 257) into src/id3.c and recompile (make && sudo make install). Now your fancy Unicode titles and names will be halfway readable.

Leave a Reply

  1.  

    |