Blame SOURCES/dos2unix.1

f6f581
.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
f6f581
.\"
f6f581
.\" Standard preamble:
f6f581
.\" ========================================================================
f6f581
.de Sp \" Vertical space (when we can't use .PP)
f6f581
.if t .sp .5v
f6f581
.if n .sp
f6f581
..
f6f581
.de Vb \" Begin verbatim text
f6f581
.ft CW
f6f581
.nf
f6f581
.ne \\$1
f6f581
..
f6f581
.de Ve \" End verbatim text
f6f581
.ft R
f6f581
.fi
f6f581
..
f6f581
.\" Set up some character translations and predefined strings.  \*(-- will
f6f581
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
f6f581
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
f6f581
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
f6f581
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
f6f581
.\" nothing in troff, for use with C<>.
f6f581
.tr \(*W-
f6f581
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
f6f581
.ie n \{\
f6f581
.    ds -- \(*W-
f6f581
.    ds PI pi
f6f581
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
f6f581
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
f6f581
.    ds L" ""
f6f581
.    ds R" ""
f6f581
.    ds C` ""
f6f581
.    ds C' ""
f6f581
'br\}
f6f581
.el\{\
f6f581
.    ds -- \|\(em\|
f6f581
.    ds PI \(*p
f6f581
.    ds L" ``
f6f581
.    ds R" ''
f6f581
.    ds C`
f6f581
.    ds C'
f6f581
'br\}
f6f581
.\"
f6f581
.\" Escape single quotes in literal strings from groff's Unicode transform.
f6f581
.ie \n(.g .ds Aq \(aq
f6f581
.el       .ds Aq '
f6f581
.\"
f6f581
.\" If the F register is turned on, we'll generate index entries on stderr for
f6f581
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
f6f581
.\" entries marked with X<> in POD.  Of course, you'll have to process the
f6f581
.\" output yourself in some meaningful fashion.
f6f581
.\"
f6f581
.\" Avoid warning from groff about undefined register 'F'.
f6f581
.de IX
f6f581
..
f6f581
.nr rF 0
f6f581
.if \n(.g .if rF .nr rF 1
f6f581
.if (\n(rF:(\n(.g==0)) \{
f6f581
.    if \nF \{
f6f581
.        de IX
f6f581
.        tm Index:\\$1\t\\n%\t"\\$2"
f6f581
..
f6f581
.        if !\nF==2 \{
f6f581
.            nr % 0
f6f581
.            nr F 2
f6f581
.        \}
f6f581
.    \}
f6f581
.\}
f6f581
.rr rF
f6f581
.\"
f6f581
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
f6f581
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
f6f581
.    \" fudge factors for nroff and troff
f6f581
.if n \{\
f6f581
.    ds #H 0
f6f581
.    ds #V .8m
f6f581
.    ds #F .3m
f6f581
.    ds #[ \f1
f6f581
.    ds #] \fP
f6f581
.\}
f6f581
.if t \{\
f6f581
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
f6f581
.    ds #V .6m
f6f581
.    ds #F 0
f6f581
.    ds #[ \&
f6f581
.    ds #] \&
f6f581
.\}
f6f581
.    \" simple accents for nroff and troff
f6f581
.if n \{\
f6f581
.    ds ' \&
f6f581
.    ds ` \&
f6f581
.    ds ^ \&
f6f581
.    ds , \&
f6f581
.    ds ~ ~
f6f581
.    ds /
f6f581
.\}
f6f581
.if t \{\
f6f581
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
f6f581
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
f6f581
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
f6f581
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
f6f581
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
f6f581
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
f6f581
.\}
f6f581
.    \" troff and (daisy-wheel) nroff accents
f6f581
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
f6f581
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
f6f581
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
f6f581
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
f6f581
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
f6f581
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
f6f581
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
f6f581
.ds ae a\h'-(\w'a'u*4/10)'e
f6f581
.ds Ae A\h'-(\w'A'u*4/10)'E
f6f581
.    \" corrections for vroff
f6f581
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
f6f581
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
f6f581
.    \" for low resolution devices (crt and lpr)
f6f581
.if \n(.H>23 .if \n(.V>19 \
f6f581
\{\
f6f581
.    ds : e
f6f581
.    ds 8 ss
f6f581
.    ds o a
f6f581
.    ds d- d\h'-1'\(ga
f6f581
.    ds D- D\h'-1'\(hy
f6f581
.    ds th \o'bp'
f6f581
.    ds Th \o'LP'
f6f581
.    ds ae ae
f6f581
.    ds Ae AE
f6f581
.\}
f6f581
.rm #[ #] #H #V #F C
f6f581
.\" ========================================================================
f6f581
.\"
f6f581
.IX Title "dos2unix 1"
f6f581
.TH dos2unix 1 "2012-09-15" "dos2unix" "2017-03-10"
f6f581
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
f6f581
.\" way too many mistakes in technical documents.
f6f581
.if n .ad l
f6f581
.nh
f6f581
.SH "NAME"
f6f581
dos2unix \- DOS/Mac to Unix and vice versa text file format converter
f6f581
.SH "SYNOPSIS"
f6f581
.IX Header "SYNOPSIS"
f6f581
.Vb 2
f6f581
\&    dos2unix [options] [FILE ...] [\-n INFILE OUTFILE ...]
f6f581
\&    unix2dos [options] [FILE ...] [\-n INFILE OUTFILE ...]
f6f581
.Ve
f6f581
.SH "DESCRIPTION"
f6f581
.IX Header "DESCRIPTION"
f6f581
The Dos2unix package includes utilities \f(CW\*(C`dos2unix\*(C'\fR and \f(CW\*(C`unix2dos\*(C'\fR to convert
f6f581
plain text files in \s-1DOS\s0 or Mac format to Unix format and vice versa.
f6f581
.PP
f6f581
In DOS/Windows text files a line break, also known as newline, is a combination
f6f581
of two characters: a Carriage Return (\s-1CR\s0) followed by a Line Feed (\s-1LF\s0). In Unix
f6f581
text files a line break is a single character: the Line Feed (\s-1LF\s0). In Mac text
f6f581
files, prior to Mac \s-1OS X,\s0 a line break was single Carriage Return (\s-1CR\s0)
f6f581
character. Nowadays Mac \s-1OS\s0 uses Unix style (\s-1LF\s0) line breaks.
f6f581
.PP
f6f581
Binary files are automatically skipped, unless conversion is forced.
f6f581
.PP
f6f581
Non-regular files, such as directories and FIFOs, are automatically skipped.
f6f581
.PP
f6f581
Symbolic links and their targets are by default kept untouched.
f6f581
Symbolic links can optionally be replaced, or the output can be written
f6f581
to the symbolic link target.
f6f581
Symbolic links on Windows are not supported. Windows symbolic links
f6f581
always replaced, keeping the targets unchanged.
f6f581
.PP
f6f581
Dos2unix was modelled after dos2unix under SunOS/Solaris and has similar
f6f581
conversion modes.
f6f581
.SH "OPTIONS"
f6f581
.IX Header "OPTIONS"
f6f581
.IP "\fB\-\-\fR" 4
f6f581
.IX Item "--"
f6f581
Treat all following options as file names. Use this option if you want to
f6f581
convert files whose names start with a dash. For instance to convert
f6f581
a file named \*(L"\-foo\*(R", you can use this command:
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    dos2unix \-\- \-foo
f6f581
.Ve
f6f581
.Sp
f6f581
Or in new file mode:
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    dos2unix \-n \-\- \-foo out.txt
f6f581
.Ve
f6f581
.IP "\fB\-ascii\fR" 4
f6f581
.IX Item "-ascii"
f6f581
Convert only line breaks. This is the default conversion mode.
f6f581
.IP "\fB\-iso\fR" 4
f6f581
.IX Item "-iso"
f6f581
Conversion between \s-1DOS\s0 and \s-1ISO\-8859\-1\s0 character set. See also section
f6f581
\&\s-1CONVERSION MODES.\s0
f6f581
.IP "\fB\-1252\fR" 4
f6f581
.IX Item "-1252"
f6f581
Use Windows code page 1252 (Western European).
f6f581
.IP "\fB\-437\fR" 4
f6f581
.IX Item "-437"
f6f581
Use \s-1DOS\s0 code page 437 (\s-1US\s0). This is the default code page used for \s-1ISO\s0 conversion.
f6f581
.IP "\fB\-850\fR" 4
f6f581
.IX Item "-850"
f6f581
Use \s-1DOS\s0 code page 850 (Western European).
f6f581
.IP "\fB\-860\fR" 4
f6f581
.IX Item "-860"
f6f581
Use \s-1DOS\s0 code page 860 (Portuguese).
f6f581
.IP "\fB\-863\fR" 4
f6f581
.IX Item "-863"
f6f581
Use \s-1DOS\s0 code page 863 (French Canadian).
f6f581
.IP "\fB\-865\fR" 4
f6f581
.IX Item "-865"
f6f581
Use \s-1DOS\s0 code page 865 (Nordic).
f6f581
.IP "\fB\-7\fR" 4
f6f581
.IX Item "-7"
f6f581
Convert 8 bit characters to 7 bit space.
f6f581
.IP "\fB\-c, \-\-convmode \s-1CONVMODE\s0\fR" 4
f6f581
.IX Item "-c, --convmode CONVMODE"
f6f581
Set conversion mode. Where \s-1CONVMODE\s0 is one of:
f6f581
\&\fIascii\fR, \fI7bit\fR, \fIiso\fR, \fImac\fR
f6f581
with ascii being the default.
f6f581
.IP "\fB\-f, \-\-force\fR" 4
f6f581
.IX Item "-f, --force"
f6f581
Force conversion of binary files.
f6f581
.IP "\fB\-h, \-\-help\fR" 4
f6f581
.IX Item "-h, --help"
f6f581
Display help and exit.
f6f581
.IP "\fB\-k, \-\-keepdate\fR" 4
f6f581
.IX Item "-k, --keepdate"
f6f581
Keep the date stamp of output file same as input file.
f6f581
.IP "\fB\-L, \-\-license\fR" 4
f6f581
.IX Item "-L, --license"
f6f581
Display program's license.
f6f581
.IP "\fB\-l, \-\-newline\fR" 4
f6f581
.IX Item "-l, --newline"
f6f581
Add additional newline.
f6f581
.Sp
f6f581
\&\fBdos2unix\fR: Only \s-1DOS\s0 line breaks are changed to two Unix line breaks.
f6f581
In Mac mode only Mac line breaks are changed to two Unix
f6f581
line breaks.
f6f581
.Sp
f6f581
\&\fBunix2dos\fR: Only Unix line breaks are changed to two \s-1DOS\s0 line breaks.
f6f581
In Mac mode Unix line breaks are changed to two Mac line breaks.
f6f581
.IP "\fB\-m, \-\-add\-bom\fR" 4
f6f581
.IX Item "-m, --add-bom"
f6f581
Write an \s-1UTF\-8\s0 Byte Order Mark in the output file. Never use this option when
f6f581
the output encoding is other than \s-1UTF\-8.\s0 See also section \s-1UNICODE.\s0
f6f581
.IP "\fB\-n, \-\-newfile \s-1INFILE OUTFILE ...\s0\fR" 4
f6f581
.IX Item "-n, --newfile INFILE OUTFILE ..."
f6f581
New file mode. Convert file \s-1INFILE\s0 and write output to file \s-1OUTFILE.\s0
f6f581
File names must be given in pairs and wildcard names should \fInot\fR be
f6f581
used or you \fIwill\fR lose your files.
f6f581
.Sp
f6f581
The person who starts the conversion in new file (paired) mode will be the owner
f6f581
of the converted file. The read/write permissions of the new file will be the
f6f581
permissions of the original file minus the \fIumask\fR\|(1) of the person who runs the
f6f581
conversion.
f6f581
.IP "\fB\-o, \-\-oldfile \s-1FILE ...\s0\fR" 4
f6f581
.IX Item "-o, --oldfile FILE ..."
f6f581
Old file mode. Convert file \s-1FILE\s0 and overwrite output to it. The program
f6f581
defaults to run in this mode. Wildcard names may be used.
f6f581
.Sp
f6f581
In old file (in-place) mode the converted file gets the same owner, group, and
f6f581
read/write permissions as the original file. Also when the file is converted by
f6f581
another user who has write permissions on the file (e.g. user root).  The
f6f581
conversion will be aborted when it is not possible to preserve the original
f6f581
values.  Change of owner could mean that the original owner is not able to read
f6f581
the file any more. Change of group could be a security risk, the file could be
f6f581
made readable for persons for whom it is not intended.  Preservation of owner,
f6f581
group, and read/write permissions is only supported on Unix.
f6f581
.IP "\fB\-q, \-\-quiet\fR" 4
f6f581
.IX Item "-q, --quiet"
f6f581
Quiet mode. Suppress all warnings and messages. The return value is zero.
f6f581
Except when wrong command-line options are used.
f6f581
.IP "\fB\-s, \-\-safe\fR" 4
f6f581
.IX Item "-s, --safe"
f6f581
Skip binary files (default).
f6f581
.IP "\fB\-F, \-\-follow\-symlink\fR" 4
f6f581
.IX Item "-F, --follow-symlink"
f6f581
Follow symbolic links and convert the targets.
f6f581
.IP "\fB\-R, \-\-replace\-symlink\fR" 4
f6f581
.IX Item "-R, --replace-symlink"
f6f581
Replace symbolic links with converted files
f6f581
(original target files remain unchanged).
f6f581
.IP "\fB\-S, \-\-skip\-symlink\fR" 4
f6f581
.IX Item "-S, --skip-symlink"
f6f581
Keep symbolic links and targets unchanged (default).
f6f581
.IP "\fB\-V, \-\-version\fR" 4
f6f581
.IX Item "-V, --version"
f6f581
Display version information and exit.
f6f581
.SH "MAC MODE"
f6f581
.IX Header "MAC MODE"
f6f581
In normal mode line breaks are converted from \s-1DOS\s0 to Unix and vice versa.
f6f581
Mac line breaks are not converted.
f6f581
.PP
f6f581
In Mac mode line breaks are converted from Mac to Unix and vice versa. \s-1DOS\s0
f6f581
line breaks are not changed.
f6f581
.PP
f6f581
To run in Mac mode use the command-line option \f(CW\*(C`\-c mac\*(C'\fR or use the
f6f581
commands \f(CW\*(C`mac2unix\*(C'\fR or \f(CW\*(C`unix2mac\*(C'\fR.
f6f581
.SH "CONVERSION MODES"
f6f581
.IX Header "CONVERSION MODES"
f6f581
Conversion modes \fIascii\fR, \fI7bit\fR, and \fIiso\fR
f6f581
are similar to those of dos2unix/unix2dos under SunOS/Solaris.
f6f581
.IP "\fBascii\fR" 4
f6f581
.IX Item "ascii"
f6f581
In mode \f(CW\*(C`ascii\*(C'\fR only line breaks are converted. This is the default
f6f581
conversion mode.
f6f581
.Sp
f6f581
Although the name of this mode is \s-1ASCII,\s0 which is a 7 bit standard, the
f6f581
actual mode is 8 bit. Use always this mode when converting Unicode \s-1UTF\-8\s0
f6f581
files.
f6f581
.IP "\fB7bit\fR" 4
f6f581
.IX Item "7bit"
f6f581
In this mode all 8 bit non-ASCII characters (with values from 128 to 255)
f6f581
are converted to a 7 bit space.
f6f581
.IP "\fBiso\fR" 4
f6f581
.IX Item "iso"
f6f581
Characters are converted between a \s-1DOS\s0 character set (code page) and \s-1ISO\s0
f6f581
character set \s-1ISO\-8859\-1 \s0(Latin\-1) on Unix. \s-1DOS\s0 characters without \s-1ISO\-8859\-1\s0
f6f581
equivalent, for which conversion is not possible, are converted to a dot. The
f6f581
same counts for \s-1ISO\-8859\-1\s0 characters without \s-1DOS\s0 counterpart.
f6f581
.Sp
f6f581
When only option \f(CW\*(C`\-iso\*(C'\fR is used dos2unix will try to determine the active code
f6f581
page. When this is not possible dos2unix will use default code page \s-1CP437,\s0
f6f581
which is mainly used in the \s-1USA. \s0 To force a specific code page use options
f6f581
\&\f(CW\*(C`\-437\*(C'\fR (\s-1US\s0), \f(CW\*(C`\-850\*(C'\fR (Western European), \f(CW\*(C`\-860\*(C'\fR (Portuguese), \f(CW\*(C`\-863\*(C'\fR (French
f6f581
Canadian), or \f(CW\*(C`\-865\*(C'\fR (Nordic).  Windows code page \s-1CP1252 \s0(Western European) is
f6f581
also supported with option \f(CW\*(C`\-1252\*(C'\fR. For other code pages use dos2unix in
f6f581
combination with \fIiconv\fR\|(1).  Iconv can convert between a long list of character
f6f581
encodings.
f6f581
.Sp
f6f581
Never use \s-1ISO\s0 converion on Unicode text files. It will corrupt \s-1UTF\-8\s0 encoded files.
f6f581
.Sp
f6f581
Some examples:
f6f581
.Sp
f6f581
Convert from \s-1DOS\s0 default code page to Unix Latin\-1
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    dos2unix \-iso \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from \s-1DOS CP850\s0 to Unix Latin\-1
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    dos2unix \-850 \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Windows \s-1CP1252\s0 to Unix Latin\-1
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    dos2unix \-1252 \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Windows \s-1CP1252\s0 to Unix \s-1UTF\-8 \s0(Unicode)
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    iconv \-f CP1252 \-t UTF\-8 in.txt | dos2unix > out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Unix Latin\-1 to \s-1DOS\s0 default code page.
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    unix2dos \-iso \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Unix Latin\-1 to \s-1DOS CP850\s0
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    unix2dos \-850 \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Unix Latin\-1 to Windows \s-1CP1252\s0
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    unix2dos \-1252 \-n in.txt out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
Convert from Unix \s-1UTF\-8 \s0(Unicode) to Windows \s-1CP1252\s0
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    unix2dos < in.txt | iconv \-f UTF\-8 \-t CP1252 > out.txt
f6f581
.Ve
f6f581
.Sp
f6f581
See also <http://czyborra.com/charsets/codepages.html>
f6f581
and <http://czyborra.com/charsets/iso8859.html>.
f6f581
.SH "UNICODE"
f6f581
.IX Header "UNICODE"
f6f581
.SS "Encodings"
f6f581
.IX Subsection "Encodings"
f6f581
There exist different Unicode encodings. On Unix and Linux Unicode files are
f6f581
typically encoded in \s-1UTF\-8\s0 encoding. On Windows Unicode text files can be
f6f581
encoded in \s-1UTF\-8, UTF\-16,\s0 or \s-1UTF\-16\s0 big endian, but are mostly encoded in
f6f581
\&\s-1UTF\-16\s0 format.
f6f581
.SS "Conversion"
f6f581
.IX Subsection "Conversion"
f6f581
Unicode text files can have \s-1DOS,\s0 Unix or Mac line breaks, like regular text
f6f581
files.
f6f581
.PP
f6f581
All versions of dos2unix and unix2dos can convert \s-1UTF\-8\s0 encoded files, because
f6f581
\&\s-1UTF\-8\s0 was designed for backward compatiblity with \s-1ASCII.\s0
f6f581
.PP
f6f581
Dos2unix and unix2dos with Unicode \s-1UTF\-16\s0 support, can read little and big
f6f581
endian \s-1UTF\-16\s0 encoded text files. To see if dos2unix was built with \s-1UTF\-16\s0
f6f581
support type \f(CW\*(C`dos2unix \-V\*(C'\fR.
f6f581
.PP
f6f581
The Windows versions of dos2unix and unix2dos convert \s-1UTF\-16\s0 encoded files
f6f581
always to \s-1UTF\-8\s0 encoded files. Unix versions of dos2unix/unix2dos convert
f6f581
\&\s-1UTF\-16\s0 encoded files to the locale character encoding when it is set to \s-1UTF\-8.\s0
f6f581
Use the \fIlocale\fR\|(1) command to find out what the locale character encoding is.
f6f581
.PP
f6f581
Because \s-1UTF\-8\s0 formatted text files are well supported on both Windows and Unix,
f6f581
dos2unix and unix2dos have no option to write \s-1UTF\-16\s0 files. All \s-1UTF\-16\s0
f6f581
characters can be encoded in \s-1UTF\-8.\s0 Conversion from \s-1UTF\-16\s0 to \s-1UTF\-8\s0 is without
f6f581
loss. \s-1UTF\-16\s0 files will be skipped on Unix when the locale character encoding
f6f581
is not \s-1UTF\-8,\s0 to prevent accidental loss of text. When an \s-1UTF\-16\s0 to \s-1UTF\-8\s0
f6f581
conversion error occurs, for instance when the \s-1UTF\-16\s0 input file contains
f6f581
an error, the file will be skipped.
f6f581
.PP
f6f581
\&\s-1ISO\s0 and 7\-bit mode conversion do not work on \s-1UTF\-16\s0 files.
f6f581
.SS "Byte Order Mark"
f6f581
.IX Subsection "Byte Order Mark"
f6f581
On Windows Unicode text files typically have a Byte Order Mark (\s-1BOM\s0), because
f6f581
many Windows programs (including Notepad) add BOMs by default. See also
f6f581
<http://en.wikipedia.org/wiki/Byte_order_mark>.
f6f581
.PP
f6f581
On Unix Unicode files typically don't have a \s-1BOM.\s0 It is assumed that text files
f6f581
are encoded in the locale character encoding.
f6f581
.PP
f6f581
Dos2unix can only detect if a file is in \s-1UTF\-16\s0 format if the file has a \s-1BOM.\s0
f6f581
When an \s-1UTF\-16\s0 file doesn't have a \s-1BOM,\s0 dos2unix will see the file as a binary
f6f581
file.
f6f581
.PP
f6f581
Use dos2unix in combination with \fIiconv\fR\|(1) to convert an \s-1UTF\-16\s0 file without
f6f581
\&\s-1BOM.\s0
f6f581
.PP
f6f581
Dos2unix never writes a \s-1BOM\s0 in the output file, unless you use option \f(CW\*(C`\-m\*(C'\fR.
f6f581
.PP
f6f581
Unix2dos writes a \s-1BOM\s0 in the output file when the input file has a \s-1BOM,\s0 or
f6f581
when option \f(CW\*(C`\-m\*(C'\fR is used.
f6f581
.SS "Unicode examples"
f6f581
.IX Subsection "Unicode examples"
f6f581
Convert from Windows \s-1UTF\-16 \s0(with \s-1BOM\s0) to Unix \s-1UTF\-8\s0
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    dos2unix \-n in.txt out.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert from Windows \s-1UTF\-16 \s0(without \s-1BOM\s0) to Unix \s-1UTF\-8\s0
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    iconv \-f UTF\-16 \-t UTF\-8 in.txt | dos2unix > out.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-8\s0 with \s-1BOM\s0
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    unix2dos \-m \-n in.txt out.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-16\s0
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    unix2dos < in.txt | iconv \-f UTF\-8 \-t UTF\-16 > out.txt
f6f581
.Ve
f6f581
.SH "EXAMPLES"
f6f581
.IX Header "EXAMPLES"
f6f581
Read input from 'stdin' and write output to 'stdout'.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    dos2unix
f6f581
\&    dos2unix \-l \-c mac
f6f581
.Ve
f6f581
.PP
f6f581
Convert and replace a.txt. Convert and replace b.txt.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    dos2unix a.txt b.txt
f6f581
\&    dos2unix \-o a.txt b.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert and replace a.txt in ascii conversion mode.
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    dos2unix a.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert and replace a.txt in ascii conversion mode.
f6f581
Convert and replace b.txt in 7bit conversion mode.
f6f581
.PP
f6f581
.Vb 3
f6f581
\&    dos2unix a.txt \-c 7bit b.txt
f6f581
\&    dos2unix \-c ascii a.txt \-c 7bit b.txt
f6f581
\&    dos2unix \-ascii a.txt \-7 b.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert a.txt from Mac to Unix format.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    dos2unix \-c mac a.txt
f6f581
\&    mac2unix a.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert a.txt from Unix to Mac format.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    unix2dos \-c mac a.txt
f6f581
\&    unix2mac a.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert and replace a.txt while keeping original date stamp.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    dos2unix \-k a.txt
f6f581
\&    dos2unix \-k \-o a.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert a.txt and write to e.txt.
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    dos2unix \-n a.txt e.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt.
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    dos2unix \-k \-n a.txt e.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert and replace a.txt. Convert b.txt and write to e.txt.
f6f581
.PP
f6f581
.Vb 2
f6f581
\&    dos2unix a.txt \-n b.txt e.txt
f6f581
\&    dos2unix \-o a.txt \-n b.txt e.txt
f6f581
.Ve
f6f581
.PP
f6f581
Convert c.txt and write to e.txt. Convert and replace a.txt.
f6f581
Convert and replace b.txt. Convert d.txt and write to f.txt.
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    dos2unix \-n c.txt e.txt \-o a.txt b.txt \-n d.txt f.txt
f6f581
.Ve
f6f581
.SH "RECURSIVE CONVERSION"
f6f581
.IX Header "RECURSIVE CONVERSION"
f6f581
Use dos2unix in combination with the \fIfind\fR\|(1) and \fIxargs\fR\|(1) commands to
f6f581
recursively convert text files in a directory tree structure. For instance to
f6f581
convert all .txt files in the directory tree under the current directory type:
f6f581
.PP
f6f581
.Vb 1
f6f581
\&    find . \-name *.txt |xargs dos2unix
f6f581
.Ve
f6f581
.SH "LOCALIZATION"
f6f581
.IX Header "LOCALIZATION"
f6f581
.IP "\fB\s-1LANG\s0\fR" 4
f6f581
.IX Item "LANG"
f6f581
The primary language is selected with the environment variable \s-1LANG.\s0 The \s-1LANG\s0
f6f581
variable consists out of several parts. The first part is in small letters the
f6f581
language code. The second is optional and is the country code in capital
f6f581
letters, preceded with an underscore. There is also an optional third part:
f6f581
character encoding, preceded with a dot. A few examples for \s-1POSIX\s0 standard type
f6f581
shells:
f6f581
.Sp
f6f581
.Vb 7
f6f581
\&    export LANG=nl               Dutch
f6f581
\&    export LANG=nl_NL            Dutch, The Netherlands
f6f581
\&    export LANG=nl_BE            Dutch, Belgium
f6f581
\&    export LANG=es_ES            Spanish, Spain
f6f581
\&    export LANG=es_MX            Spanish, Mexico
f6f581
\&    export LANG=en_US.iso88591   English, USA, Latin\-1 encoding
f6f581
\&    export LANG=en_GB.UTF\-8      English, UK, UTF\-8 encoding
f6f581
.Ve
f6f581
.Sp
f6f581
For a complete list of language and country codes see the gettext manual:
f6f581
<http://www.gnu.org/software/gettext/manual/gettext.html#Language\-Codes>
f6f581
.Sp
f6f581
On Unix systems you can use to command \fIlocale\fR\|(1) to get locale specific
f6f581
information.
f6f581
.IP "\fB\s-1LANGUAGE\s0\fR" 4
f6f581
.IX Item "LANGUAGE"
f6f581
With the \s-1LANGUAGE\s0 environment variable you can specify a priority list of
f6f581
languages, separated by colons. Dos2unix gives preference to \s-1LANGUAGE\s0 over \s-1LANG.\s0
f6f581
For instance, first Dutch and then German: \f(CW\*(C`LANGUAGE=nl:de\*(C'\fR. You have to first
f6f581
enable localization, by setting \s-1LANG \s0(or \s-1LC_ALL\s0) to a value other than
f6f581
\&\*(L"C\*(R", before you can use a language priority list through the \s-1LANGUAGE\s0
f6f581
variable. See also the gettext manual:
f6f581
<http://www.gnu.org/software/gettext/manual/gettext.html#The\-LANGUAGE\-variable>
f6f581
.Sp
f6f581
If you select a language which is not available you will get the
f6f581
standard English messages.
f6f581
.IP "\fB\s-1DOS2UNIX_LOCALEDIR\s0\fR" 4
f6f581
.IX Item "DOS2UNIX_LOCALEDIR"
f6f581
With the environment variable \s-1DOS2UNIX_LOCALEDIR\s0 the \s-1LOCALEDIR\s0 set
f6f581
during compilation can be overruled. \s-1LOCALEDIR\s0 is used to find the
f6f581
language files. The \s-1GNU\s0 default value is \f(CW\*(C`/usr/local/share/locale\*(C'\fR.
f6f581
Option \fB\-\-version\fR will display the \s-1LOCALEDIR\s0 that is used.
f6f581
.Sp
f6f581
Example (\s-1POSIX\s0 shell):
f6f581
.Sp
f6f581
.Vb 1
f6f581
\&    export DOS2UNIX_LOCALEDIR=$HOME/share/locale
f6f581
.Ve
f6f581
.SH "RETURN VALUE"
f6f581
.IX Header "RETURN VALUE"
f6f581
On success, zero is returned.  When a system error occurs the last system error will be
f6f581
returned. For other errors 1 is returned.
f6f581
.PP
f6f581
The return value is always zero in quiet mode, except when wrong command-line options
f6f581
are used.
f6f581
.SH "STANDARDS"
f6f581
.IX Header "STANDARDS"
f6f581
<http://en.wikipedia.org/wiki/Text_file>
f6f581
.PP
f6f581
<http://en.wikipedia.org/wiki/Carriage_return>
f6f581
.PP
f6f581
<http://en.wikipedia.org/wiki/Newline>
f6f581
.PP
f6f581
<http://en.wikipedia.org/wiki/Unicode>
f6f581
.SH "AUTHORS"
f6f581
.IX Header "AUTHORS"
f6f581
Benjamin Lin \- <blin@socs.uts.edu.au>
f6f581
Bernd Johannes Wuebben (mac2unix mode) \- <wuebben@kde.org>,
f6f581
Christian Wurll (add extra newline) \- <wurll@ira.uka.de>,
f6f581
Erwin Waterlander \- <waterlan@xs4all.nl> (Maintainer)
f6f581
.PP
f6f581
Project page: <http://waterlan.home.xs4all.nl/dos2unix.html>
f6f581
.PP
f6f581
SourceForge page: <http://sourceforge.net/projects/dos2unix/>
f6f581
.PP
f6f581
Freecode: <http://freecode.com/projects/dos2unix>
f6f581
.SH "SEE ALSO"
f6f581
.IX Header "SEE ALSO"
f6f581
\&\fIfile\fR\|(1)
f6f581
\&\fIfind\fR\|(1)
f6f581
\&\fIiconv\fR\|(1)
f6f581
\&\fIlocale\fR\|(1)
f6f581
\&\fIxargs\fR\|(1)