|
@@ -0,0 +1,302 @@
|
|
|
|
+--- file-5.05/src/Makefile.am.vinejtext 2010-07-22 00:56:10.000000000 +0900
|
|
|
|
++++ file-5.05/src/Makefile.am 2011-02-11 16:53:06.000000000 +0900
|
|
|
|
+@@ -4,11 +4,11 @@
|
|
|
|
+
|
|
|
|
+ bin_PROGRAMS = file
|
|
|
|
+
|
|
|
|
+-AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"'
|
|
|
|
++AM_CPPFLAGS = -DMAGIC='"$(MAGIC)"' -DDETECT_JAPANESE
|
|
|
|
+ AM_CFLAGS = @WARNINGS@
|
|
|
|
+
|
|
|
|
+ libmagic_la_SOURCES = magic.c apprentice.c softmagic.c ascmagic.c \
|
|
|
|
+- encoding.c compress.c is_tar.c readelf.c print.c fsmagic.c \
|
|
|
|
++ encoding.c compress.c is_tar.c readelf.c print.c jcode.c fsmagic.c \
|
|
|
|
+ funcs.c file.h names.h patchlevel.h readelf.h tar.h apptype.c \
|
|
|
|
+ file_opts.h elfclass.h mygetopt.h cdf.c cdf_time.c readcdf.c cdf.h
|
|
|
|
+ libmagic_la_LDFLAGS = -no-undefined -version-info 1:0:0
|
|
|
|
+--- file-5.05/src/encoding.c.vinejtext 2010-07-22 01:47:17.000000000 +0900
|
|
|
|
++++ file-5.05/src/encoding.c 2011-02-11 17:26:00.000000000 +0900
|
|
|
|
+@@ -42,7 +42,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.5
|
|
|
|
+ #include <string.h>
|
|
|
|
+ #include <memory.h>
|
|
|
|
+ #include <stdlib.h>
|
|
|
|
+-
|
|
|
|
++#include "jcode.h"
|
|
|
|
+
|
|
|
|
+ private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
|
|
|
|
+ private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
|
|
|
|
+@@ -68,7 +68,7 @@ protected int
|
|
|
|
+ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
|
|
|
|
+ {
|
|
|
|
+ size_t mlen;
|
|
|
|
+- int rv = 1, ucs_type;
|
|
|
|
++ int rv = 1, ucs_type, jcode;
|
|
|
|
+ unsigned char *nbuf = NULL;
|
|
|
|
+
|
|
|
|
+ mlen = (nbytes + 1) * sizeof(nbuf[0]);
|
|
|
|
+@@ -83,10 +83,27 @@ file_encoding(struct magic_set *ms, cons
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ *type = "text";
|
|
|
|
+- if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
|
|
|
|
++ jcode = detect_kcode(buf, nbytes, *ubuf, ulen);
|
|
|
|
++ if (jcode == ASCII) {
|
|
|
|
+ DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
+ *code = "ASCII";
|
|
|
|
+ *code_mime = "us-ascii";
|
|
|
|
++ } else if (jcode == JIS) {
|
|
|
|
++ DPRINTF(("jis %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
++ code = "7-bit JIS [ESC$B, ESC(B]";
|
|
|
|
++ code_mime = "jis";
|
|
|
|
++ } else if (jcode == SJIS){
|
|
|
|
++ DPRINTF(("sjis %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
++ code = "SJIS";
|
|
|
|
++ code_mime = "sjis";
|
|
|
|
++ } else if (jcode == EUC){
|
|
|
|
++ DPRINTF(("euc %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
++ code = "EUC";
|
|
|
|
++ code_mime = "euc-jp";
|
|
|
|
++ } else if (jcode == EUCORSJIS){
|
|
|
|
++ DPRINTF(("euc or sjis %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
++ code = "EUC or SJIS";
|
|
|
|
++ code_mime = "unknown";
|
|
|
|
+ } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
|
|
|
|
+ DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
|
|
|
|
+ *code = "UTF-8 Unicode (with BOM)";
|
|
|
|
+--- /dev/null 2011-02-06 21:11:58.373999997 +0900
|
|
|
|
++++ file-5.05/src/jcode.c 2011-02-11 17:14:29.000000000 +0900
|
|
|
|
+@@ -0,0 +1,205 @@
|
|
|
|
++/*
|
|
|
|
++jcode.c: Kanji-code detect routing by Jun Nishii <jun@vinelinux.org>
|
|
|
|
++ modified by Ryoichi INAGAKI <inagaki@vinelinux.org>
|
|
|
|
++ */
|
|
|
|
++#include <stdio.h>
|
|
|
|
++#include <unistd.h>
|
|
|
|
++#include <signal.h>
|
|
|
|
++#include <sys/types.h>
|
|
|
|
++#include <sys/wait.h>
|
|
|
|
++#include <jcode.h>
|
|
|
|
++
|
|
|
|
++typedef unsigned long unichar;
|
|
|
|
++
|
|
|
|
++#define F 0 /* character never appears in text */
|
|
|
|
++#define T 1 /* character appears in plain ASCII text */
|
|
|
|
++#define I 2 /* character appears in ISO-8859 text */
|
|
|
|
++#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
|
|
|
|
++#define J 4 /* character appears in JIS or plain ASCII */
|
|
|
|
++#define S 5 /* character appears in SJIS */
|
|
|
|
++#define E 6 /* character appears in EUC */
|
|
|
|
++#define O 7 /* character appears in EUC or SJIS */
|
|
|
|
++
|
|
|
|
++#define ESC 27
|
|
|
|
++
|
|
|
|
++static char jp_chars1[256] = {
|
|
|
|
++ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
|
|
|
|
++ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
|
|
|
|
++ T, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x2X */
|
|
|
|
++ J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x3X */
|
|
|
|
++ J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x4X */
|
|
|
|
++ J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x5X */
|
|
|
|
++ J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, /* 0x6X */
|
|
|
|
++ J, J, J, J, J, J, J, J, J, J, J, J, J, J, J, F, /* 0x7X */
|
|
|
|
++ /* NEL */
|
|
|
|
++ X, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */
|
|
|
|
++ I, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xaX */
|
|
|
|
++ E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xbX */
|
|
|
|
++ E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xcX */
|
|
|
|
++ E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, /* 0xdX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */
|
|
|
|
++ E, E, E, E, E, E, E, E, E, E, E, E, E, E, E, I /* 0xfX */
|
|
|
|
++};
|
|
|
|
++
|
|
|
|
++static char jp_chars2[256] = {
|
|
|
|
++ /* BEL BS HT LF FF CR */
|
|
|
|
++ F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
|
|
|
|
++ /* ESC */
|
|
|
|
++ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
|
|
|
|
++ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
|
|
|
|
++ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x4X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x5X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x6X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, F, /* 0x7X */
|
|
|
|
++ /* NEL */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x8X */
|
|
|
|
++ S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 0x9X */
|
|
|
|
++ S, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xaX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xbX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xcX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xdX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, /* 0xeX */
|
|
|
|
++ O, O, O, O, O, O, O, O, O, O, O, O, O, E, E, I /* 0xfX */
|
|
|
|
++};
|
|
|
|
++
|
|
|
|
++
|
|
|
|
++int
|
|
|
|
++check_asc_jis(buf, nbytes, ubuf, ulen)
|
|
|
|
++ const unsigned char *buf;
|
|
|
|
++ size_t nbytes;
|
|
|
|
++ unichar *ubuf;
|
|
|
|
++ size_t *ulen;
|
|
|
|
++{
|
|
|
|
++ size_t i;
|
|
|
|
++ int jflag;
|
|
|
|
++
|
|
|
|
++ *ulen = 0; jflag=0;
|
|
|
|
++
|
|
|
|
++ for (i = 0; i < nbytes; i++) {
|
|
|
|
++ int t = jp_chars1[buf[i]];
|
|
|
|
++
|
|
|
|
++ if (t != T && t != J )
|
|
|
|
++ return 0;
|
|
|
|
++
|
|
|
|
++ if (buf[i] == ESC && i+2<nbytes) {
|
|
|
|
++ if ((buf[i+1]=='$' && buf[i+2]=='B')||
|
|
|
|
++ (buf[i+1]=='$' && buf[i+2]=='@')) jflag=1;
|
|
|
|
++ }
|
|
|
|
++
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ }
|
|
|
|
++
|
|
|
|
++ if (jflag==1) return JIS;
|
|
|
|
++ else return ASCII;
|
|
|
|
++}
|
|
|
|
++
|
|
|
|
++int
|
|
|
|
++check_sjis(buf, nbytes, ubuf, ulen)
|
|
|
|
++ const unsigned char *buf;
|
|
|
|
++ size_t nbytes;
|
|
|
|
++ unichar *ubuf;
|
|
|
|
++ size_t *ulen;
|
|
|
|
++{
|
|
|
|
++ size_t i;
|
|
|
|
++ int jflag;
|
|
|
|
++
|
|
|
|
++ *ulen = 0;
|
|
|
|
++ jflag = ASCII;
|
|
|
|
++ for (i = 0; i < nbytes; i++) {
|
|
|
|
++ int t = jp_chars1[buf[i]];
|
|
|
|
++
|
|
|
|
++ if (t != T && t != J && t != S && t!= O)
|
|
|
|
++ return 0;
|
|
|
|
++
|
|
|
|
++ if (t == S && i<nbytes-1){
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ ++i;
|
|
|
|
++ t=jp_chars2[buf[i]];
|
|
|
|
++ if(t != S && t != O ) return 0;
|
|
|
|
++ jflag=SJIS;
|
|
|
|
++ } else if (t == O && i<nbytes-1){
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ ++i;
|
|
|
|
++ t=jp_chars2[buf[i]];
|
|
|
|
++ if( t == S ){ jflag=SJIS; }
|
|
|
|
++ else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; }
|
|
|
|
++ else return 0;
|
|
|
|
++ }
|
|
|
|
++
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ }
|
|
|
|
++#ifdef Z
|
|
|
|
++ if (jflag==SJIS) {ckfputs("SJIS text", stdout); return SJIS;}
|
|
|
|
++ if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;}
|
|
|
|
++#else
|
|
|
|
++ if (jflag==SJIS) {return SJIS;}
|
|
|
|
++ if (jflag==EUCORSJIS) {return EUCORSJIS;}
|
|
|
|
++#endif
|
|
|
|
++}
|
|
|
|
++
|
|
|
|
++int
|
|
|
|
++check_euc(buf, nbytes, ubuf, ulen)
|
|
|
|
++ const unsigned char *buf;
|
|
|
|
++ size_t nbytes;
|
|
|
|
++ unichar *ubuf;
|
|
|
|
++ size_t *ulen;
|
|
|
|
++{
|
|
|
|
++ size_t i;
|
|
|
|
++ int jflag;
|
|
|
|
++
|
|
|
|
++ *ulen = 0;
|
|
|
|
++ jflag = ASCII;
|
|
|
|
++
|
|
|
|
++ for (i = 0; i < nbytes; i++) {
|
|
|
|
++ int t = jp_chars1[buf[i]];
|
|
|
|
++
|
|
|
|
++ if (t != T && t != J && t != E && t!= O)
|
|
|
|
++ return 0;
|
|
|
|
++
|
|
|
|
++ if (t == E && i<nbytes-1){
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ ++i;
|
|
|
|
++ t= jp_chars2[buf[i]];
|
|
|
|
++ if( t != E && t != O) return 0;
|
|
|
|
++ jflag=EUC;
|
|
|
|
++ } else if (t == O && i<nbytes-1){
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ ++i;
|
|
|
|
++ t=jp_chars2[buf[i]];
|
|
|
|
++ if( t == E ){ jflag=EUC; }
|
|
|
|
++ else if( t == O ){ if(jflag==ASCII) jflag=EUCORSJIS; }
|
|
|
|
++ else return 0;
|
|
|
|
++ }
|
|
|
|
++
|
|
|
|
++ ubuf[(*ulen)++] = buf[i];
|
|
|
|
++ }
|
|
|
|
++#ifdef Z
|
|
|
|
++ if (jflag==EUC) {ckfputs("EUC text", stdout); return EUC;}
|
|
|
|
++ if (jflag==EUCORSJIS) {ckfputs("EUCorSJIS", stdout); return EUCORSJIS;}
|
|
|
|
++#else
|
|
|
|
++ if (jflag==EUC) { return EUC;}
|
|
|
|
++ if (jflag==EUCORSJIS) {return EUCORSJIS;}
|
|
|
|
++#endif
|
|
|
|
++}
|
|
|
|
++
|
|
|
|
++int
|
|
|
|
++detect_kcode(buf, nbytes, ubuf, ulen)
|
|
|
|
++ const unsigned char *buf;
|
|
|
|
++ size_t nbytes;
|
|
|
|
++ unichar *ubuf;
|
|
|
|
++ size_t *ulen;
|
|
|
|
++{
|
|
|
|
++ int ret;
|
|
|
|
++ ret=check_asc_jis(buf, nbytes, ubuf, ulen);
|
|
|
|
++ if(ret==ASCII) return ASCII;
|
|
|
|
++ if(ret==JIS) return JIS;
|
|
|
|
++
|
|
|
|
++ ret=check_sjis(buf, nbytes, ubuf, ulen);
|
|
|
|
++ if(ret==SJIS) return SJIS;
|
|
|
|
++ if(ret==EUCORSJIS) return EUCORSJIS;
|
|
|
|
++ ret=check_euc(buf, nbytes, ubuf, ulen);
|
|
|
|
++ if(ret==EUC) return EUC;
|
|
|
|
++ if(ret==EUCORSJIS) return EUCORSJIS;
|
|
|
|
++}
|
|
|
|
+--- /dev/null 2011-02-06 21:11:58.373999997 +0900
|
|
|
|
++++ file-5.05/src/jcode.h 2011-02-11 17:12:11.000000000 +0900
|
|
|
|
+@@ -0,0 +1,15 @@
|
|
|
|
++/*
|
|
|
|
++ jcode.h - for jcode.c by Jun Nishii <jun@vinelinux.org>
|
|
|
|
++ modified by Ryoichi INAGAKI <inagaki@vinelinux.org>
|
|
|
|
++ */
|
|
|
|
++
|
|
|
|
++#define ASCII 1
|
|
|
|
++#define JIS 2
|
|
|
|
++#define EUC 3
|
|
|
|
++#define SJIS 4
|
|
|
|
++#define EUCORSJIS 5
|
|
|
|
++
|
|
|
|
++extern int detect_kcode (const unsigned char *, size_t, unichar *, size_t *);
|
|
|
|
++extern int looks_jis (const unsigned char *, size_t, unichar *, size_t *);
|
|
|
|
++extern int looks_sjis (const unsigned char *, size_t, unichar *, size_t *);
|
|
|
|
++extern int looks_euc (const unsigned char *, size_t, unichar *, size_t *);
|
|
|
|
+--- file-5.05/src/names.h.vinejtext 2010-10-09 06:58:44.000000000 +0900
|
|
|
|
++++ file-5.05/src/names.h 2011-02-11 17:28:18.000000000 +0900
|
|
|
|
+@@ -135,8 +135,6 @@
|
|
|
|
+ {"/*", L_C, 2 }, /* must precede "The", "the", etc. */
|
|
|
|
+ {"#include", L_C, 2 },
|
|
|
|
+ {"char", L_C, 2 },
|
|
|
|
+- {"The", L_ENG, 2 },
|
|
|
|
+- {"the", L_ENG, 2 },
|
|
|
|
+ {"double", L_C, 1 },
|
|
|
|
+ {"extern", L_C, 2 },
|
|
|
|
+ {"float", L_C, 1 },
|