Blame SOURCES/dos2unix.1

9e1524
.\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
9e1524
.\"
9e1524
.\" Standard preamble:
9e1524
.\" ========================================================================
9e1524
.de Sp \" Vertical space (when we can't use .PP)
9e1524
.if t .sp .5v
9e1524
.if n .sp
9e1524
..
9e1524
.de Vb \" Begin verbatim text
9e1524
.ft CW
9e1524
.nf
9e1524
.ne \\$1
9e1524
..
9e1524
.de Ve \" End verbatim text
9e1524
.ft R
9e1524
.fi
9e1524
..
9e1524
.\" Set up some character translations and predefined strings.  \*(-- will
9e1524
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
9e1524
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
9e1524
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
9e1524
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
9e1524
.\" nothing in troff, for use with C<>.
9e1524
.tr \(*W-
9e1524
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
9e1524
.ie n \{\
9e1524
.    ds -- \(*W-
9e1524
.    ds PI pi
9e1524
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
9e1524
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
9e1524
.    ds L" ""
9e1524
.    ds R" ""
9e1524
.    ds C` ""
9e1524
.    ds C' ""
9e1524
'br\}
9e1524
.el\{\
9e1524
.    ds -- \|\(em\|
9e1524
.    ds PI \(*p
9e1524
.    ds L" ``
9e1524
.    ds R" ''
9e1524
.    ds C`
9e1524
.    ds C'
9e1524
'br\}
9e1524
.\"
9e1524
.\" Escape single quotes in literal strings from groff's Unicode transform.
9e1524
.ie \n(.g .ds Aq \(aq
9e1524
.el       .ds Aq '
9e1524
.\"
9e1524
.\" If the F register is turned on, we'll generate index entries on stderr for
9e1524
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
9e1524
.\" entries marked with X<> in POD.  Of course, you'll have to process the
9e1524
.\" output yourself in some meaningful fashion.
9e1524
.\"
9e1524
.\" Avoid warning from groff about undefined register 'F'.
9e1524
.de IX
9e1524
..
9e1524
.nr rF 0
9e1524
.if \n(.g .if rF .nr rF 1
9e1524
.if (\n(rF:(\n(.g==0)) \{
9e1524
.    if \nF \{
9e1524
.        de IX
9e1524
.        tm Index:\\$1\t\\n%\t"\\$2"
9e1524
..
9e1524
.        if !\nF==2 \{
9e1524
.            nr % 0
9e1524
.            nr F 2
9e1524
.        \}
9e1524
.    \}
9e1524
.\}
9e1524
.rr rF
9e1524
.\"
9e1524
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
9e1524
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
9e1524
.    \" fudge factors for nroff and troff
9e1524
.if n \{\
9e1524
.    ds #H 0
9e1524
.    ds #V .8m
9e1524
.    ds #F .3m
9e1524
.    ds #[ \f1
9e1524
.    ds #] \fP
9e1524
.\}
9e1524
.if t \{\
9e1524
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
9e1524
.    ds #V .6m
9e1524
.    ds #F 0
9e1524
.    ds #[ \&
9e1524
.    ds #] \&
9e1524
.\}
9e1524
.    \" simple accents for nroff and troff
9e1524
.if n \{\
9e1524
.    ds ' \&
9e1524
.    ds ` \&
9e1524
.    ds ^ \&
9e1524
.    ds , \&
9e1524
.    ds ~ ~
9e1524
.    ds /
9e1524
.\}
9e1524
.if t \{\
9e1524
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
9e1524
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
9e1524
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
9e1524
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
9e1524
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
9e1524
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
9e1524
.\}
9e1524
.    \" troff and (daisy-wheel) nroff accents
9e1524
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
9e1524
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
9e1524
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
9e1524
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
9e1524
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
9e1524
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
9e1524
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
9e1524
.ds ae a\h'-(\w'a'u*4/10)'e
9e1524
.ds Ae A\h'-(\w'A'u*4/10)'E
9e1524
.    \" corrections for vroff
9e1524
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
9e1524
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
9e1524
.    \" for low resolution devices (crt and lpr)
9e1524
.if \n(.H>23 .if \n(.V>19 \
9e1524
\{\
9e1524
.    ds : e
9e1524
.    ds 8 ss
9e1524
.    ds o a
9e1524
.    ds d- d\h'-1'\(ga
9e1524
.    ds D- D\h'-1'\(hy
9e1524
.    ds th \o'bp'
9e1524
.    ds Th \o'LP'
9e1524
.    ds ae ae
9e1524
.    ds Ae AE
9e1524
.\}
9e1524
.rm #[ #] #H #V #F C
9e1524
.\" ========================================================================
9e1524
.\"
9e1524
.IX Title "dos2unix 1"
9e1524
.TH dos2unix 1 "2012-09-15" "dos2unix" "2017-03-10"
9e1524
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
9e1524
.\" way too many mistakes in technical documents.
9e1524
.if n .ad l
9e1524
.nh
9e1524
.SH "NAME"
9e1524
dos2unix \- DOS/Mac to Unix and vice versa text file format converter
9e1524
.SH "SYNOPSIS"
9e1524
.IX Header "SYNOPSIS"
9e1524
.Vb 2
9e1524
\&    dos2unix [options] [FILE ...] [\-n INFILE OUTFILE ...]
9e1524
\&    unix2dos [options] [FILE ...] [\-n INFILE OUTFILE ...]
9e1524
.Ve
9e1524
.SH "DESCRIPTION"
9e1524
.IX Header "DESCRIPTION"
9e1524
The Dos2unix package includes utilities \f(CW\*(C`dos2unix\*(C'\fR and \f(CW\*(C`unix2dos\*(C'\fR to convert
9e1524
plain text files in \s-1DOS\s0 or Mac format to Unix format and vice versa.
9e1524
.PP
9e1524
In DOS/Windows text files a line break, also known as newline, is a combination
9e1524
of two characters: a Carriage Return (\s-1CR\s0) followed by a Line Feed (\s-1LF\s0). In Unix
9e1524
text files a line break is a single character: the Line Feed (\s-1LF\s0). In Mac text
9e1524
files, prior to Mac \s-1OS X,\s0 a line break was single Carriage Return (\s-1CR\s0)
9e1524
character. Nowadays Mac \s-1OS\s0 uses Unix style (\s-1LF\s0) line breaks.
9e1524
.PP
9e1524
Binary files are automatically skipped, unless conversion is forced.
9e1524
.PP
9e1524
Non-regular files, such as directories and FIFOs, are automatically skipped.
9e1524
.PP
9e1524
Symbolic links and their targets are by default kept untouched.
9e1524
Symbolic links can optionally be replaced, or the output can be written
9e1524
to the symbolic link target.
9e1524
Symbolic links on Windows are not supported. Windows symbolic links
9e1524
always replaced, keeping the targets unchanged.
9e1524
.PP
9e1524
Dos2unix was modelled after dos2unix under SunOS/Solaris and has similar
9e1524
conversion modes.
9e1524
.SH "OPTIONS"
9e1524
.IX Header "OPTIONS"
9e1524
.IP "\fB\-\-\fR" 4
9e1524
.IX Item "--"
9e1524
Treat all following options as file names. Use this option if you want to
9e1524
convert files whose names start with a dash. For instance to convert
9e1524
a file named \*(L"\-foo\*(R", you can use this command:
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    dos2unix \-\- \-foo
9e1524
.Ve
9e1524
.Sp
9e1524
Or in new file mode:
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    dos2unix \-n \-\- \-foo out.txt
9e1524
.Ve
9e1524
.IP "\fB\-ascii\fR" 4
9e1524
.IX Item "-ascii"
9e1524
Convert only line breaks. This is the default conversion mode.
9e1524
.IP "\fB\-iso\fR" 4
9e1524
.IX Item "-iso"
9e1524
Conversion between \s-1DOS\s0 and \s-1ISO\-8859\-1\s0 character set. See also section
9e1524
\&\s-1CONVERSION MODES.\s0
9e1524
.IP "\fB\-1252\fR" 4
9e1524
.IX Item "-1252"
9e1524
Use Windows code page 1252 (Western European).
9e1524
.IP "\fB\-437\fR" 4
9e1524
.IX Item "-437"
9e1524
Use \s-1DOS\s0 code page 437 (\s-1US\s0). This is the default code page used for \s-1ISO\s0 conversion.
9e1524
.IP "\fB\-850\fR" 4
9e1524
.IX Item "-850"
9e1524
Use \s-1DOS\s0 code page 850 (Western European).
9e1524
.IP "\fB\-860\fR" 4
9e1524
.IX Item "-860"
9e1524
Use \s-1DOS\s0 code page 860 (Portuguese).
9e1524
.IP "\fB\-863\fR" 4
9e1524
.IX Item "-863"
9e1524
Use \s-1DOS\s0 code page 863 (French Canadian).
9e1524
.IP "\fB\-865\fR" 4
9e1524
.IX Item "-865"
9e1524
Use \s-1DOS\s0 code page 865 (Nordic).
9e1524
.IP "\fB\-7\fR" 4
9e1524
.IX Item "-7"
9e1524
Convert 8 bit characters to 7 bit space.
9e1524
.IP "\fB\-c, \-\-convmode \s-1CONVMODE\s0\fR" 4
9e1524
.IX Item "-c, --convmode CONVMODE"
9e1524
Set conversion mode. Where \s-1CONVMODE\s0 is one of:
9e1524
\&\fIascii\fR, \fI7bit\fR, \fIiso\fR, \fImac\fR
9e1524
with ascii being the default.
9e1524
.IP "\fB\-f, \-\-force\fR" 4
9e1524
.IX Item "-f, --force"
9e1524
Force conversion of binary files.
9e1524
.IP "\fB\-h, \-\-help\fR" 4
9e1524
.IX Item "-h, --help"
9e1524
Display help and exit.
9e1524
.IP "\fB\-k, \-\-keepdate\fR" 4
9e1524
.IX Item "-k, --keepdate"
9e1524
Keep the date stamp of output file same as input file.
9e1524
.IP "\fB\-L, \-\-license\fR" 4
9e1524
.IX Item "-L, --license"
9e1524
Display program's license.
9e1524
.IP "\fB\-l, \-\-newline\fR" 4
9e1524
.IX Item "-l, --newline"
9e1524
Add additional newline.
9e1524
.Sp
9e1524
\&\fBdos2unix\fR: Only \s-1DOS\s0 line breaks are changed to two Unix line breaks.
9e1524
In Mac mode only Mac line breaks are changed to two Unix
9e1524
line breaks.
9e1524
.Sp
9e1524
\&\fBunix2dos\fR: Only Unix line breaks are changed to two \s-1DOS\s0 line breaks.
9e1524
In Mac mode Unix line breaks are changed to two Mac line breaks.
9e1524
.IP "\fB\-m, \-\-add\-bom\fR" 4
9e1524
.IX Item "-m, --add-bom"
9e1524
Write an \s-1UTF\-8\s0 Byte Order Mark in the output file. Never use this option when
9e1524
the output encoding is other than \s-1UTF\-8.\s0 See also section \s-1UNICODE.\s0
9e1524
.IP "\fB\-n, \-\-newfile \s-1INFILE OUTFILE ...\s0\fR" 4
9e1524
.IX Item "-n, --newfile INFILE OUTFILE ..."
9e1524
New file mode. Convert file \s-1INFILE\s0 and write output to file \s-1OUTFILE.\s0
9e1524
File names must be given in pairs and wildcard names should \fInot\fR be
9e1524
used or you \fIwill\fR lose your files.
9e1524
.Sp
9e1524
The person who starts the conversion in new file (paired) mode will be the owner
9e1524
of the converted file. The read/write permissions of the new file will be the
9e1524
permissions of the original file minus the \fIumask\fR\|(1) of the person who runs the
9e1524
conversion.
9e1524
.IP "\fB\-o, \-\-oldfile \s-1FILE ...\s0\fR" 4
9e1524
.IX Item "-o, --oldfile FILE ..."
9e1524
Old file mode. Convert file \s-1FILE\s0 and overwrite output to it. The program
9e1524
defaults to run in this mode. Wildcard names may be used.
9e1524
.Sp
9e1524
In old file (in-place) mode the converted file gets the same owner, group, and
9e1524
read/write permissions as the original file. Also when the file is converted by
9e1524
another user who has write permissions on the file (e.g. user root).  The
9e1524
conversion will be aborted when it is not possible to preserve the original
9e1524
values.  Change of owner could mean that the original owner is not able to read
9e1524
the file any more. Change of group could be a security risk, the file could be
9e1524
made readable for persons for whom it is not intended.  Preservation of owner,
9e1524
group, and read/write permissions is only supported on Unix.
9e1524
.IP "\fB\-q, \-\-quiet\fR" 4
9e1524
.IX Item "-q, --quiet"
9e1524
Quiet mode. Suppress all warnings and messages. The return value is zero.
9e1524
Except when wrong command-line options are used.
9e1524
.IP "\fB\-s, \-\-safe\fR" 4
9e1524
.IX Item "-s, --safe"
9e1524
Skip binary files (default).
9e1524
.IP "\fB\-F, \-\-follow\-symlink\fR" 4
9e1524
.IX Item "-F, --follow-symlink"
9e1524
Follow symbolic links and convert the targets.
9e1524
.IP "\fB\-R, \-\-replace\-symlink\fR" 4
9e1524
.IX Item "-R, --replace-symlink"
9e1524
Replace symbolic links with converted files
9e1524
(original target files remain unchanged).
9e1524
.IP "\fB\-S, \-\-skip\-symlink\fR" 4
9e1524
.IX Item "-S, --skip-symlink"
9e1524
Keep symbolic links and targets unchanged (default).
9e1524
.IP "\fB\-V, \-\-version\fR" 4
9e1524
.IX Item "-V, --version"
9e1524
Display version information and exit.
9e1524
.SH "MAC MODE"
9e1524
.IX Header "MAC MODE"
9e1524
In normal mode line breaks are converted from \s-1DOS\s0 to Unix and vice versa.
9e1524
Mac line breaks are not converted.
9e1524
.PP
9e1524
In Mac mode line breaks are converted from Mac to Unix and vice versa. \s-1DOS\s0
9e1524
line breaks are not changed.
9e1524
.PP
9e1524
To run in Mac mode use the command-line option \f(CW\*(C`\-c mac\*(C'\fR or use the
9e1524
commands \f(CW\*(C`mac2unix\*(C'\fR or \f(CW\*(C`unix2mac\*(C'\fR.
9e1524
.SH "CONVERSION MODES"
9e1524
.IX Header "CONVERSION MODES"
9e1524
Conversion modes \fIascii\fR, \fI7bit\fR, and \fIiso\fR
9e1524
are similar to those of dos2unix/unix2dos under SunOS/Solaris.
9e1524
.IP "\fBascii\fR" 4
9e1524
.IX Item "ascii"
9e1524
In mode \f(CW\*(C`ascii\*(C'\fR only line breaks are converted. This is the default
9e1524
conversion mode.
9e1524
.Sp
9e1524
Although the name of this mode is \s-1ASCII,\s0 which is a 7 bit standard, the
9e1524
actual mode is 8 bit. Use always this mode when converting Unicode \s-1UTF\-8\s0
9e1524
files.
9e1524
.IP "\fB7bit\fR" 4
9e1524
.IX Item "7bit"
9e1524
In this mode all 8 bit non-ASCII characters (with values from 128 to 255)
9e1524
are converted to a 7 bit space.
9e1524
.IP "\fBiso\fR" 4
9e1524
.IX Item "iso"
9e1524
Characters are converted between a \s-1DOS\s0 character set (code page) and \s-1ISO\s0
9e1524
character set \s-1ISO\-8859\-1 \s0(Latin\-1) on Unix. \s-1DOS\s0 characters without \s-1ISO\-8859\-1\s0
9e1524
equivalent, for which conversion is not possible, are converted to a dot. The
9e1524
same counts for \s-1ISO\-8859\-1\s0 characters without \s-1DOS\s0 counterpart.
9e1524
.Sp
9e1524
When only option \f(CW\*(C`\-iso\*(C'\fR is used dos2unix will try to determine the active code
9e1524
page. When this is not possible dos2unix will use default code page \s-1CP437,\s0
9e1524
which is mainly used in the \s-1USA. \s0 To force a specific code page use options
9e1524
\&\f(CW\*(C`\-437\*(C'\fR (\s-1US\s0), \f(CW\*(C`\-850\*(C'\fR (Western European), \f(CW\*(C`\-860\*(C'\fR (Portuguese), \f(CW\*(C`\-863\*(C'\fR (French
9e1524
Canadian), or \f(CW\*(C`\-865\*(C'\fR (Nordic).  Windows code page \s-1CP1252 \s0(Western European) is
9e1524
also supported with option \f(CW\*(C`\-1252\*(C'\fR. For other code pages use dos2unix in
9e1524
combination with \fIiconv\fR\|(1).  Iconv can convert between a long list of character
9e1524
encodings.
9e1524
.Sp
9e1524
Never use \s-1ISO\s0 converion on Unicode text files. It will corrupt \s-1UTF\-8\s0 encoded files.
9e1524
.Sp
9e1524
Some examples:
9e1524
.Sp
9e1524
Convert from \s-1DOS\s0 default code page to Unix Latin\-1
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    dos2unix \-iso \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from \s-1DOS CP850\s0 to Unix Latin\-1
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    dos2unix \-850 \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Windows \s-1CP1252\s0 to Unix Latin\-1
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    dos2unix \-1252 \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Windows \s-1CP1252\s0 to Unix \s-1UTF\-8 \s0(Unicode)
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    iconv \-f CP1252 \-t UTF\-8 in.txt | dos2unix > out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Unix Latin\-1 to \s-1DOS\s0 default code page.
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    unix2dos \-iso \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Unix Latin\-1 to \s-1DOS CP850\s0
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    unix2dos \-850 \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Unix Latin\-1 to Windows \s-1CP1252\s0
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    unix2dos \-1252 \-n in.txt out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
Convert from Unix \s-1UTF\-8 \s0(Unicode) to Windows \s-1CP1252\s0
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    unix2dos < in.txt | iconv \-f UTF\-8 \-t CP1252 > out.txt
9e1524
.Ve
9e1524
.Sp
9e1524
See also <http://czyborra.com/charsets/codepages.html>
9e1524
and <http://czyborra.com/charsets/iso8859.html>.
9e1524
.SH "UNICODE"
9e1524
.IX Header "UNICODE"
9e1524
.SS "Encodings"
9e1524
.IX Subsection "Encodings"
9e1524
There exist different Unicode encodings. On Unix and Linux Unicode files are
9e1524
typically encoded in \s-1UTF\-8\s0 encoding. On Windows Unicode text files can be
9e1524
encoded in \s-1UTF\-8, UTF\-16,\s0 or \s-1UTF\-16\s0 big endian, but are mostly encoded in
9e1524
\&\s-1UTF\-16\s0 format.
9e1524
.SS "Conversion"
9e1524
.IX Subsection "Conversion"
9e1524
Unicode text files can have \s-1DOS,\s0 Unix or Mac line breaks, like regular text
9e1524
files.
9e1524
.PP
9e1524
All versions of dos2unix and unix2dos can convert \s-1UTF\-8\s0 encoded files, because
9e1524
\&\s-1UTF\-8\s0 was designed for backward compatiblity with \s-1ASCII.\s0
9e1524
.PP
9e1524
Dos2unix and unix2dos with Unicode \s-1UTF\-16\s0 support, can read little and big
9e1524
endian \s-1UTF\-16\s0 encoded text files. To see if dos2unix was built with \s-1UTF\-16\s0
9e1524
support type \f(CW\*(C`dos2unix \-V\*(C'\fR.
9e1524
.PP
9e1524
The Windows versions of dos2unix and unix2dos convert \s-1UTF\-16\s0 encoded files
9e1524
always to \s-1UTF\-8\s0 encoded files. Unix versions of dos2unix/unix2dos convert
9e1524
\&\s-1UTF\-16\s0 encoded files to the locale character encoding when it is set to \s-1UTF\-8.\s0
9e1524
Use the \fIlocale\fR\|(1) command to find out what the locale character encoding is.
9e1524
.PP
9e1524
Because \s-1UTF\-8\s0 formatted text files are well supported on both Windows and Unix,
9e1524
dos2unix and unix2dos have no option to write \s-1UTF\-16\s0 files. All \s-1UTF\-16\s0
9e1524
characters can be encoded in \s-1UTF\-8.\s0 Conversion from \s-1UTF\-16\s0 to \s-1UTF\-8\s0 is without
9e1524
loss. \s-1UTF\-16\s0 files will be skipped on Unix when the locale character encoding
9e1524
is not \s-1UTF\-8,\s0 to prevent accidental loss of text. When an \s-1UTF\-16\s0 to \s-1UTF\-8\s0
9e1524
conversion error occurs, for instance when the \s-1UTF\-16\s0 input file contains
9e1524
an error, the file will be skipped.
9e1524
.PP
9e1524
\&\s-1ISO\s0 and 7\-bit mode conversion do not work on \s-1UTF\-16\s0 files.
9e1524
.SS "Byte Order Mark"
9e1524
.IX Subsection "Byte Order Mark"
9e1524
On Windows Unicode text files typically have a Byte Order Mark (\s-1BOM\s0), because
9e1524
many Windows programs (including Notepad) add BOMs by default. See also
9e1524
<http://en.wikipedia.org/wiki/Byte_order_mark>.
9e1524
.PP
9e1524
On Unix Unicode files typically don't have a \s-1BOM.\s0 It is assumed that text files
9e1524
are encoded in the locale character encoding.
9e1524
.PP
9e1524
Dos2unix can only detect if a file is in \s-1UTF\-16\s0 format if the file has a \s-1BOM.\s0
9e1524
When an \s-1UTF\-16\s0 file doesn't have a \s-1BOM,\s0 dos2unix will see the file as a binary
9e1524
file.
9e1524
.PP
9e1524
Use dos2unix in combination with \fIiconv\fR\|(1) to convert an \s-1UTF\-16\s0 file without
9e1524
\&\s-1BOM.\s0
9e1524
.PP
9e1524
Dos2unix never writes a \s-1BOM\s0 in the output file, unless you use option \f(CW\*(C`\-m\*(C'\fR.
9e1524
.PP
9e1524
Unix2dos writes a \s-1BOM\s0 in the output file when the input file has a \s-1BOM,\s0 or
9e1524
when option \f(CW\*(C`\-m\*(C'\fR is used.
9e1524
.SS "Unicode examples"
9e1524
.IX Subsection "Unicode examples"
9e1524
Convert from Windows \s-1UTF\-16 \s0(with \s-1BOM\s0) to Unix \s-1UTF\-8\s0
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    dos2unix \-n in.txt out.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert from Windows \s-1UTF\-16 \s0(without \s-1BOM\s0) to Unix \s-1UTF\-8\s0
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    iconv \-f UTF\-16 \-t UTF\-8 in.txt | dos2unix > out.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-8\s0 with \s-1BOM\s0
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    unix2dos \-m \-n in.txt out.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-16\s0
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    unix2dos < in.txt | iconv \-f UTF\-8 \-t UTF\-16 > out.txt
9e1524
.Ve
9e1524
.SH "EXAMPLES"
9e1524
.IX Header "EXAMPLES"
9e1524
Read input from 'stdin' and write output to 'stdout'.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    dos2unix
9e1524
\&    dos2unix \-l \-c mac
9e1524
.Ve
9e1524
.PP
9e1524
Convert and replace a.txt. Convert and replace b.txt.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    dos2unix a.txt b.txt
9e1524
\&    dos2unix \-o a.txt b.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert and replace a.txt in ascii conversion mode.
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    dos2unix a.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert and replace a.txt in ascii conversion mode.
9e1524
Convert and replace b.txt in 7bit conversion mode.
9e1524
.PP
9e1524
.Vb 3
9e1524
\&    dos2unix a.txt \-c 7bit b.txt
9e1524
\&    dos2unix \-c ascii a.txt \-c 7bit b.txt
9e1524
\&    dos2unix \-ascii a.txt \-7 b.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert a.txt from Mac to Unix format.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    dos2unix \-c mac a.txt
9e1524
\&    mac2unix a.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert a.txt from Unix to Mac format.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    unix2dos \-c mac a.txt
9e1524
\&    unix2mac a.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert and replace a.txt while keeping original date stamp.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    dos2unix \-k a.txt
9e1524
\&    dos2unix \-k \-o a.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert a.txt and write to e.txt.
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    dos2unix \-n a.txt e.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt.
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    dos2unix \-k \-n a.txt e.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert and replace a.txt. Convert b.txt and write to e.txt.
9e1524
.PP
9e1524
.Vb 2
9e1524
\&    dos2unix a.txt \-n b.txt e.txt
9e1524
\&    dos2unix \-o a.txt \-n b.txt e.txt
9e1524
.Ve
9e1524
.PP
9e1524
Convert c.txt and write to e.txt. Convert and replace a.txt.
9e1524
Convert and replace b.txt. Convert d.txt and write to f.txt.
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    dos2unix \-n c.txt e.txt \-o a.txt b.txt \-n d.txt f.txt
9e1524
.Ve
9e1524
.SH "RECURSIVE CONVERSION"
9e1524
.IX Header "RECURSIVE CONVERSION"
9e1524
Use dos2unix in combination with the \fIfind\fR\|(1) and \fIxargs\fR\|(1) commands to
9e1524
recursively convert text files in a directory tree structure. For instance to
9e1524
convert all .txt files in the directory tree under the current directory type:
9e1524
.PP
9e1524
.Vb 1
9e1524
\&    find . \-name *.txt |xargs dos2unix
9e1524
.Ve
9e1524
.SH "LOCALIZATION"
9e1524
.IX Header "LOCALIZATION"
9e1524
.IP "\fB\s-1LANG\s0\fR" 4
9e1524
.IX Item "LANG"
9e1524
The primary language is selected with the environment variable \s-1LANG.\s0 The \s-1LANG\s0
9e1524
variable consists out of several parts. The first part is in small letters the
9e1524
language code. The second is optional and is the country code in capital
9e1524
letters, preceded with an underscore. There is also an optional third part:
9e1524
character encoding, preceded with a dot. A few examples for \s-1POSIX\s0 standard type
9e1524
shells:
9e1524
.Sp
9e1524
.Vb 7
9e1524
\&    export LANG=nl               Dutch
9e1524
\&    export LANG=nl_NL            Dutch, The Netherlands
9e1524
\&    export LANG=nl_BE            Dutch, Belgium
9e1524
\&    export LANG=es_ES            Spanish, Spain
9e1524
\&    export LANG=es_MX            Spanish, Mexico
9e1524
\&    export LANG=en_US.iso88591   English, USA, Latin\-1 encoding
9e1524
\&    export LANG=en_GB.UTF\-8      English, UK, UTF\-8 encoding
9e1524
.Ve
9e1524
.Sp
9e1524
For a complete list of language and country codes see the gettext manual:
9e1524
<http://www.gnu.org/software/gettext/manual/gettext.html#Language\-Codes>
9e1524
.Sp
9e1524
On Unix systems you can use to command \fIlocale\fR\|(1) to get locale specific
9e1524
information.
9e1524
.IP "\fB\s-1LANGUAGE\s0\fR" 4
9e1524
.IX Item "LANGUAGE"
9e1524
With the \s-1LANGUAGE\s0 environment variable you can specify a priority list of
9e1524
languages, separated by colons. Dos2unix gives preference to \s-1LANGUAGE\s0 over \s-1LANG.\s0
9e1524
For instance, first Dutch and then German: \f(CW\*(C`LANGUAGE=nl:de\*(C'\fR. You have to first
9e1524
enable localization, by setting \s-1LANG \s0(or \s-1LC_ALL\s0) to a value other than
9e1524
\&\*(L"C\*(R", before you can use a language priority list through the \s-1LANGUAGE\s0
9e1524
variable. See also the gettext manual:
9e1524
<http://www.gnu.org/software/gettext/manual/gettext.html#The\-LANGUAGE\-variable>
9e1524
.Sp
9e1524
If you select a language which is not available you will get the
9e1524
standard English messages.
9e1524
.IP "\fB\s-1DOS2UNIX_LOCALEDIR\s0\fR" 4
9e1524
.IX Item "DOS2UNIX_LOCALEDIR"
9e1524
With the environment variable \s-1DOS2UNIX_LOCALEDIR\s0 the \s-1LOCALEDIR\s0 set
9e1524
during compilation can be overruled. \s-1LOCALEDIR\s0 is used to find the
9e1524
language files. The \s-1GNU\s0 default value is \f(CW\*(C`/usr/local/share/locale\*(C'\fR.
9e1524
Option \fB\-\-version\fR will display the \s-1LOCALEDIR\s0 that is used.
9e1524
.Sp
9e1524
Example (\s-1POSIX\s0 shell):
9e1524
.Sp
9e1524
.Vb 1
9e1524
\&    export DOS2UNIX_LOCALEDIR=$HOME/share/locale
9e1524
.Ve
9e1524
.SH "RETURN VALUE"
9e1524
.IX Header "RETURN VALUE"
9e1524
On success, zero is returned.  When a system error occurs the last system error will be
9e1524
returned. For other errors 1 is returned.
9e1524
.PP
9e1524
The return value is always zero in quiet mode, except when wrong command-line options
9e1524
are used.
9e1524
.SH "STANDARDS"
9e1524
.IX Header "STANDARDS"
9e1524
<http://en.wikipedia.org/wiki/Text_file>
9e1524
.PP
9e1524
<http://en.wikipedia.org/wiki/Carriage_return>
9e1524
.PP
9e1524
<http://en.wikipedia.org/wiki/Newline>
9e1524
.PP
9e1524
<http://en.wikipedia.org/wiki/Unicode>
9e1524
.SH "AUTHORS"
9e1524
.IX Header "AUTHORS"
9e1524
Benjamin Lin \- <blin@socs.uts.edu.au>
9e1524
Bernd Johannes Wuebben (mac2unix mode) \- <wuebben@kde.org>,
9e1524
Christian Wurll (add extra newline) \- <wurll@ira.uka.de>,
9e1524
Erwin Waterlander \- <waterlan@xs4all.nl> (Maintainer)
9e1524
.PP
9e1524
Project page: <http://waterlan.home.xs4all.nl/dos2unix.html>
9e1524
.PP
9e1524
SourceForge page: <http://sourceforge.net/projects/dos2unix/>
9e1524
.PP
9e1524
Freecode: <http://freecode.com/projects/dos2unix>
9e1524
.SH "SEE ALSO"
9e1524
.IX Header "SEE ALSO"
9e1524
\&\fIfile\fR\|(1)
9e1524
\&\fIfind\fR\|(1)
9e1524
\&\fIiconv\fR\|(1)
9e1524
\&\fIlocale\fR\|(1)
9e1524
\&\fIxargs\fR\|(1)