/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

  • Committer: edam
  • Date: 2010-12-11 23:57:07 UTC
  • Revision ID: edam@waxworlds.org-20101211235707-czyw48tt3hcopuwf
- read vCard files in as raw bytes now and convert to string only tentatively to check for version no.s and property names and params
- ASCII is now (correctly) the default charset
- added conversion from 8-bit ASCII to UTF-8 (not used on 7-bit US-ASCII) which works on raw bytes, not chars
- unencode quoted-printable now works on raw bytes, not chars

Show diffs side-by-side

added added

removed removed

25
25
 
26
26
import java.io.BufferedReader;
27
27
import java.io.File;
 
28
import java.io.FileInputStream;
28
29
import java.io.FileNotFoundException;
29
30
import java.io.FileReader;
30
31
import java.io.FilenameFilter;
31
32
import java.io.IOException;
32
33
import java.io.UnsupportedEncodingException;
 
34
import java.nio.ByteBuffer;
33
35
import java.util.Arrays;
34
36
import java.util.HashSet;
35
37
import java.util.List;
148
150
 
149
151
        private void importVCardFile( File file ) throws AbortImportException
150
152
        {
 
153
                // check file is good
 
154
                if( !file.exists() )
 
155
                        showError( getText( R.string.error_filenotfound ) +
 
156
                                file.getName() );
 
157
                if( file.length() == 0 )
 
158
                        showError( getText( R.string.error_fileisempty ) +
 
159
                                file.getName() );
 
160
 
151
161
                try
152
162
                {
153
 
                        // open file
154
 
                        BufferedReader reader = new BufferedReader(
155
 
                                        new FileReader( file ) );
156
 
 
157
 
                        // read
158
 
                        StringBuffer content = new StringBuffer();
159
 
                        String line;
160
 
                        while( ( line = reader.readLine() ) != null )
161
 
                                content.append( line ).append( "\n" );
162
 
 
163
 
                        importVCardFileContent( content.toString(), file.getName() );
 
163
                        // open/read file
 
164
                        FileInputStream istream = new FileInputStream( file );
 
165
                        byte[] content = new byte[ (int)file.length() ];
 
166
                        istream.read( content );
 
167
 
 
168
                        // import
 
169
                        importVCardFileContent( content, file.getName() );
164
170
                }
165
171
                catch( FileNotFoundException e ) {
166
172
                        showError( getText( R.string.error_filenotfound ) +
171
177
                }
172
178
        }
173
179
 
174
 
        private void importVCardFileContent( String content, String fileName )
 
180
        private void importVCardFileContent( byte[] content, String fileName )
175
181
                        throws AbortImportException
176
182
        {
177
 
                // get lines and parse them
178
 
                String[] lines = content.split( "\n" );
 
183
                ByteBuffer buffers[] = getLinesFromContent( content );
 
184
 
 
185
                // go through lines
179
186
                VCard vCard = null;
180
 
                for( int i = 0; i < lines.length; i++ )
 
187
                for( int i = 0; i < buffers.length; i++ )
181
188
                {
182
 
                        String line = lines[ i ];
 
189
                        // get a US-ASCII version of the line for processing
 
190
                        String line;
 
191
                        try {
 
192
                                line = new String( buffers[ i ].array(), buffers[ i ].position(),
 
193
                                        buffers[ i ].limit() - buffers[ i ].position(), "US-ASCII" );
 
194
                        }
 
195
                        catch( UnsupportedEncodingException e ) {
 
196
                                // we know US-ASCII is supported, so appease the compiler...
 
197
                                line = "";
 
198
                        }
183
199
 
184
200
                        if( vCard == null ) {
185
201
                                // look for vcard beginning
214
230
                                {
215
231
                                        // try giving the line to the vcard
216
232
                                        try {
217
 
                                                vCard.parseLine( line );
 
233
                                                vCard.parseLine( buffers[ i ] );
218
234
                                        }
219
235
                                        catch( VCard.ParseException e ) {
220
236
                                                skipContact();
239
255
                }
240
256
        }
241
257
 
 
258
        private ByteBuffer[] getLinesFromContent( byte[] content )
 
259
        {
 
260
                // count lines in data
 
261
                int num_lines = 1;
 
262
                for( int a = 0; a < content.length; a++ )
 
263
                        if( content[ a ] == '\n' )
 
264
                                num_lines++;
 
265
 
 
266
                // get lines, removing \r's and \n's as we go
 
267
                ByteBuffer lines[] = new ByteBuffer[ num_lines ];
 
268
                int last = 0;
 
269
                for( int a = 0, b = 0; a < content.length; a++ )
 
270
                        if( content[ a ] == '\n' ) {
 
271
                                int to = ( a > 0 && content[ a - 1 ] == '\r' &&
 
272
                                        a - 1 >= last )? a - 1 : a;
 
273
                                lines[ b++ ] = ByteBuffer.wrap( content, last, to - last );
 
274
                                last = a + 1;
 
275
                        }
 
276
                lines[ lines.length - 1 ] = ByteBuffer.wrap( content, last,
 
277
                        content.length - last );
 
278
 
 
279
                return lines;
 
280
        }
 
281
 
242
282
        private class VCard extends ContactData
243
283
        {
244
284
                private final static int NAMELEVEL_NONE = 0;
247
287
                private final static int NAMELEVEL_N = 3;
248
288
 
249
289
                private String _version = null;
250
 
                private Vector< String > _lines = null;
 
290
                private Vector< ByteBuffer > _buffers = null;
251
291
                private int _name_level = NAMELEVEL_NONE;
252
292
                private boolean _parser_in_multiline = false;
253
293
                private String _parser_current_name_and_params = null;
256
296
                protected class UnencodeResult
257
297
                {
258
298
                        private boolean _another_line_required;
259
 
                        private byte[] _bytes;
260
 
                        private int _num_bytes;
 
299
                        private ByteBuffer _buffer;
261
300
 
262
 
                        public UnencodeResult( boolean another_line_required, byte[] bytes,
263
 
                                int num_bytes )
 
301
                        public UnencodeResult( boolean another_line_required,
 
302
                                ByteBuffer buffer )
264
303
                        {
265
304
                                _another_line_required = another_line_required;
266
 
                                _bytes = bytes;
267
 
                                _num_bytes = num_bytes;
 
305
                                _buffer = buffer;
268
306
                        }
269
307
 
270
308
                        public boolean isAnotherLineRequired()
272
310
                                return _another_line_required;
273
311
                        }
274
312
 
275
 
                        public byte[] getBytes()
276
 
                        {
277
 
                                return _bytes;
278
 
                        }
279
 
 
280
 
                        public int getNumBytes()
281
 
                        {
282
 
                                return _num_bytes;
 
313
                        public ByteBuffer getBuffer()
 
314
                        {
 
315
                                return _buffer;
283
316
                        }
284
317
                }
285
318
 
301
334
                @SuppressWarnings("serial")
302
335
                protected class SkipContactException extends Exception { }
303
336
 
304
 
                public void parseLine( String line )
 
337
                public void parseLine( ByteBuffer buffer )
305
338
                                throws ParseException, SkipContactException,
306
339
                                AbortImportException
307
340
                {
 
341
                        // get a US-ASCII version of the line for processing
 
342
                        String line;
 
343
                        try {
 
344
                                line = new String( buffer.array(), buffer.position(),
 
345
                                        buffer.limit() - buffer.position(), "US-ASCII" );
 
346
                        }
 
347
                        catch( UnsupportedEncodingException e ) {
 
348
                                // we know US-ASCII is supported, so appease the compiler...
 
349
                                line = "";
 
350
                        }
 
351
 
308
352
                        // ignore empty lines
309
353
                        if( line.trim() == "" ) return;
310
354
 
311
355
                        // split line into name and value parts (this may turn out to be
312
356
                        // unwanted if the line is a subsequent line in a multi-line
313
357
                        // value, but we have to do this now to check for and handle VCF
314
 
                        // versions first)
315
 
                        String[] props = line.split(  ":", 2 );
316
 
                        for( int i = 0; i < props.length; i++ )
317
 
                                props[ i ] = props[ i ].trim();
 
358
                        // versions first). Also, the value part is only created tentatively
 
359
                        // because it may have an encoding/charset. Since we're treating it
 
360
                        // as UTF-8 (which is compatible with 7-bit US-ASCII) this is ok
 
361
                        // though so long as we later use the raw bytes. ALso we check for
 
362
                        // malformed property:name pairs.
 
363
                        String name_and_params, string_value;
 
364
                        {
 
365
                                String[] bits = line.split(  ":", 2 );
 
366
                                if( bits.length == 2 ) {
 
367
                                        name_and_params = bits[ 0 ].trim();
 
368
                                        string_value = bits[ 1 ].trim();
 
369
                                        if( name_and_params.length() == 0 )
 
370
                                                throw new ParseException( R.string.error_vcf_malformed );
 
371
                                }
 
372
                                else
 
373
                                {
 
374
                                        if( !_parser_in_multiline )
 
375
                                                throw new ParseException( R.string.error_vcf_malformed );
 
376
                                        name_and_params = null;
 
377
                                        string_value = null;
 
378
                                }
 
379
                        }
318
380
 
319
381
                        // if we haven't yet got a version, we won't be paring anything!
320
382
                        if( _version == null )
321
383
                        {
322
384
                                // is this a version?
323
 
                                if( props.length == 2 && props[ 0 ].equals( "VERSION" ) )
 
385
                                if( name_and_params.equals( "VERSION" ) )
324
386
                                {
325
387
                                        // yes, check/store it
326
 
                                        if( !props[ 1 ].equals( "2.1" ) &&
327
 
                                                        !props[ 1 ].equals( "3.0" ) )
 
388
                                        if( !string_value.equals( "2.1" ) &&
 
389
                                                        !string_value.equals( "3.0" ) )
328
390
                                                throw new ParseException( R.string.error_vcf_version );
329
 
                                        _version = props[ 1 ];
 
391
                                        _version = string_value;
330
392
 
331
 
                                        // parse any other lines we've accumulated so far
332
 
                                        if( _lines != null )
333
 
                                                for( int i = 0; i < _lines.size(); i++ )
334
 
                                                        parseLine( _lines.get( i ) );
335
 
                                        _lines = null;
 
393
                                        // parse any other buffers we've accumulated so far
 
394
                                        if( _buffers != null )
 
395
                                                for( int i = 0; i < _buffers.size(); i++ )
 
396
                                                        parseLine( _buffers.get( i ) );
 
397
                                        _buffers = null;
336
398
                                }
337
399
                                else
338
400
                                {
339
 
                                        // no, so stash this line till we have a version
340
 
                                        if( _lines == null )
341
 
                                                _lines = new Vector< String >();
342
 
                                        _lines.add( line );
 
401
                                        // no, so stash this buffer till we have a version
 
402
                                        if( _buffers == null )
 
403
                                                _buffers = new Vector< ByteBuffer >();
 
404
                                        _buffers.add( buffer );
343
405
                                }
344
406
                        }
345
407
                        else
346
408
                        {
 
409
                                // value bytes, for processing
 
410
                                ByteBuffer value;
 
411
 
347
412
                                if( _parser_in_multiline )
348
413
                                {
349
414
                                        // if we're currently in a multi-line value, use the stored
350
415
                                        // property name and parameters
351
 
                                        props = new String[ 2 ];
352
 
                                        props[ 0 ] = _parser_current_name_and_params;
353
 
                                        props[ 1 ] = line.trim();
 
416
                                        name_and_params = _parser_current_name_and_params;
 
417
 
 
418
                                        // find start of string (skip spaces/tabs)
 
419
                                        int pos = buffer.position();
 
420
                                        byte[] buffer_array = buffer.array();
 
421
                                        while( pos < buffer.limit() && (
 
422
                                                buffer_array[ pos ] == ' ' ||
 
423
                                                buffer_array[ pos ] == '\t' ) )
 
424
                                        {
 
425
                                                pos++;
 
426
                                        }
 
427
 
 
428
                                        // get value from buffer
 
429
                                        value = ByteBuffer.wrap( buffer.array(), pos,
 
430
                                                buffer.limit() - pos );
354
431
                                }
355
432
                                else
356
433
                                {
357
 
                                        // for normal lines, check the property name/value bits
358
 
                                        if( props.length < 2 || props[ 0 ].length() == 0 )
359
 
                                                throw new ParseException(
360
 
                                                        R.string.error_vcf_malformed );
361
 
 
362
 
                                        // ignore empty properties
363
 
                                        if( props[ 1 ].length() < 1 )
364
 
                                                return;
 
434
                                        // ignore empty values
 
435
                                        if( string_value.length() < 1 ) return;
 
436
 
 
437
                                        // calculate how many chars to skip from beginning of line
 
438
                                        // so we skip the property "name:" part
 
439
                                        int pos = buffer.position() + name_and_params.length() + 1;
 
440
 
 
441
                                        // get value from buffer
 
442
                                        value = ByteBuffer.wrap( buffer.array(), pos,
 
443
                                                buffer.limit() - pos );
365
444
 
366
445
                                        // reset the saved multi-line state
367
 
                                        _parser_current_name_and_params = props[ 0 ];
 
446
                                        _parser_current_name_and_params = name_and_params;
368
447
                                        _parser_buffered_value_so_far = "";
369
448
                                }
370
449
 
371
450
                                // get parameter parts
372
 
                                String[] params = props[ 0 ].split( ";" );
373
 
                                for( int i = 0; i < params.length; i++ )
374
 
                                        params[ i ] = params[ i ].trim();
 
451
                                String[] name_and_param_bits = name_and_params.split( ";" );
 
452
                                for( int i = 0; i < name_and_param_bits.length; i++ )
 
453
                                        name_and_param_bits[ i ] = name_and_param_bits[ i ].trim();
375
454
 
376
 
                                // parse charset and encoding parameters
377
 
                                String charset, encoding;
378
 
                                if( ( charset = checkParam( params, "CHARSET" ) ) != null &&
379
 
                                        !charset.equals( "UTF-8" ) && !charset.equals( "UTF-16" ) )
380
 
                                {
381
 
                                        throw new ParseException( R.string.error_vcf_charset );
382
 
                                }
383
 
                                if( ( encoding = checkParam( params, "ENCODING" ) ) != null &&
384
 
                                        !encoding.equals( "QUOTED-PRINTABLE" ) &&
385
 
                                        !encoding.equals( "8BIT" ) )
 
455
                                // parse encoding parameter
 
456
                                String encoding = checkParam( name_and_param_bits, "ENCODING" );
 
457
                                if( encoding != null ) encoding = encoding.toUpperCase();
 
458
                                if( encoding != null && !encoding.equals( "8BIT" ) &&
 
459
                                        !encoding.equals( "QUOTED-PRINTABLE" ) )
386
460
                                        //&& !encoding.equals( "BASE64" ) )
387
461
                                {
388
462
                                        throw new ParseException( R.string.error_vcf_encoding );
389
463
                                }
390
464
 
 
465
                                // parse charset parameter
 
466
                                String charset = checkParam( name_and_param_bits, "CHARSET" );
 
467
                                if( charset != null ) charset = charset.toUpperCase();
 
468
                                if( charset != null && !charset.equals( "US-ASCII" ) &&
 
469
                                        !charset.equals( "ASCII" ) && !charset.equals( "UTF-8" ) )
 
470
                                {
 
471
                                        throw new ParseException( R.string.error_vcf_charset );
 
472
                                }
 
473
 
391
474
                                // do unencoding (or default to a fake unencoding result with
392
475
                                // the raw string)
393
 
                                UnencodeResult result;
 
476
                                UnencodeResult unencoding_result = null;
394
477
                                if( encoding != null && encoding.equals( "QUOTED-PRINTABLE" ) )
395
 
                                        result = unencodeQuotedPrintable( props[ 1 ], charset );
 
478
                                        unencoding_result = unencodeQuotedPrintable( value );
396
479
//                              else if( encoding != null && encoding.equals( "BASE64" ) )
397
480
//                                      result = unencodeBase64( props[ 1 ], charset );
398
 
                                else
399
 
                                        result = new UnencodeResult( false, props[ 1 ].getBytes(),
400
 
                                                props[ 1 ].getBytes().length );
 
481
                                if( unencoding_result != null ) {
 
482
                                        value = unencoding_result.getBuffer();
 
483
                                        _parser_in_multiline =
 
484
                                                unencoding_result.isAnotherLineRequired();
 
485
                                }
 
486
 
 
487
                                // convert 8-bit ASCII charset to US-ASCII
 
488
                                if( charset == null || charset == "ASCII" ) {
 
489
                                        value = transcodeAsciiToUtf8( value );
 
490
                                        charset = "UTF-8";
 
491
                                }
401
492
 
402
493
                                // process charset
403
494
                                try {
404
 
                                        props[ 1 ] = new String( result.getBytes(), 0,
405
 
                                                result.getNumBytes(),
406
 
                                                charset == null? "UTF-8" : charset );
 
495
                                        string_value =
 
496
                                                new String( value.array(), value.position(),
 
497
                                                        value.limit() - value.position(), charset );
407
498
                                } catch( UnsupportedEncodingException e ) {
408
499
                                        throw new ParseException( R.string.error_vcf_charset );
409
500
                                }
410
501
 
411
502
                                // handle multi-line requests
412
 
                                _parser_in_multiline = result.isAnotherLineRequired();
413
503
                                if( _parser_in_multiline ) {
414
 
                                        _parser_buffered_value_so_far += props[ 1 ];
 
504
                                        _parser_buffered_value_so_far += string_value;
415
505
                                        return;
416
506
                                }
417
507
 
418
508
                                // add on buffered multi-line content
419
 
                                String value = _parser_buffered_value_so_far + props[ 1 ];
 
509
                                String complete_value =
 
510
                                        _parser_buffered_value_so_far + string_value;
420
511
 
421
512
                                // parse some properties
422
 
                                if( params[ 0 ].equals( "N" ) )
423
 
                                        parseN( params, value );
424
 
                                else if( params[ 0 ].equals( "FN" ) )
425
 
                                        parseFN( params, value );
426
 
                                else if( params[ 0 ].equals( "ORG" ) )
427
 
                                        parseORG( params, value );
428
 
                                else if( params[ 0 ].equals( "TEL" ) )
429
 
                                        parseTEL( params, value );
430
 
                                else if( params[ 0 ].equals( "EMAIL" ) )
431
 
                                        parseEMAIL( params, value );
 
513
                                if( name_and_param_bits[ 0 ].equals( "N" ) )
 
514
                                        parseN( name_and_param_bits, complete_value );
 
515
                                else if( name_and_param_bits[ 0 ].equals( "FN" ) )
 
516
                                        parseFN( name_and_param_bits, complete_value );
 
517
                                else if( name_and_param_bits[ 0 ].equals( "ORG" ) )
 
518
                                        parseORG( name_and_param_bits, complete_value );
 
519
                                else if( name_and_param_bits[ 0 ].equals( "TEL" ) )
 
520
                                        parseTEL( name_and_param_bits, complete_value );
 
521
                                else if( name_and_param_bits[ 0 ].equals( "EMAIL" ) )
 
522
                                        parseEMAIL( name_and_param_bits, complete_value );
432
523
                        }
433
524
                }
434
525
 
542
633
                                AbortImportException
543
634
                {
544
635
                        // missing version (and data is present)
545
 
                        if( _version == null && _lines != null )
 
636
                        if( _version == null && _buffers != null )
546
637
                                throw new ParseException( R.string.error_vcf_malformed );
547
638
 
548
639
                        //  missing name properties?
591
682
                        return types;
592
683
                }
593
684
 
594
 
                private UnencodeResult unencodeQuotedPrintable( String str, String charset )
 
685
                private UnencodeResult unencodeQuotedPrintable( ByteBuffer in )
595
686
                {
596
687
                        boolean another = false;
597
688
 
598
 
                        // default encoding scheme
599
 
                        if( charset == null ) charset = "UTF-8";
600
 
 
601
689
                        // unencode quoted-pritable encoding, as per RFC1521 section 5.1
602
 
                        byte[] bytes = new byte[ str.length() ];
 
690
                        byte[] out = new byte[ in.limit() - in.position() ];
603
691
                        int j = 0;
604
 
                        for( int i = 0; i < str.length(); i++ )
 
692
                        for( int i = in.position(); i < in.limit(); i++ )
605
693
                        {
606
694
                                // get next char and process...
607
 
                                char ch = str.charAt( i );
608
 
                                if( ch == '=' && i < str.length() - 2 )
 
695
                                byte ch = in.array()[ i ];
 
696
                                if( ch == '=' && i < in.limit() - 2 )
609
697
                                {
610
698
                                        // we found a =XX format byte, add it
611
 
                                        bytes[ j ] = (byte)(
612
 
                                                        Character.digit( str.charAt( i + 1 ), 16 ) * 16 +
613
 
                                                        Character.digit( str.charAt( i + 2 ), 16 ) );
 
699
                                        out[ j ] = (byte)(
 
700
                                                Character.digit( in.array()[ i + 1 ], 16 ) * 16 +
 
701
                                                Character.digit( in.array()[ i + 2 ], 16 ) );
614
702
                                        i += 2;
615
703
                                }
616
 
                                else if( ch == '=' && i == str.length() - 1 )
 
704
                                else if( ch == '=' && i == in.limit() - 1 )
617
705
                                {
618
706
                                        // we found a '=' at the end of a line signifying a multi-
619
707
                                        // line string, so we don't add it.
622
710
                                }
623
711
                                else
624
712
                                        // just a normal char...
625
 
                                        bytes[ j ] = (byte)ch;
 
713
                                        out[ j ] = (byte)ch;
626
714
                                j++;
627
715
                        }
628
716
 
629
 
                        return new UnencodeResult( another, bytes, j );
 
717
                        return new UnencodeResult( another, ByteBuffer.wrap( out, 0, j ) );
 
718
                }
 
719
 
 
720
                private ByteBuffer transcodeAsciiToUtf8( ByteBuffer in )
 
721
                {
 
722
                        // transcode
 
723
                        byte[] out = new byte[ ( in.limit() - in.position() ) * 2 ];
 
724
                        int j = 0;
 
725
                        for( int a = in.position(); a < in.limit(); a++ )
 
726
                        {
 
727
                                // if char is < 127, keep it as-is
 
728
                                if( in.array()[ a ] >= 0 )
 
729
                                        out[ j++ ] = in.array()[ a ];
 
730
 
 
731
                                // else, convert it to UTF-8
 
732
                                else {
 
733
                                        int b = 0xff & (int)in.array()[ a ];
 
734
                                        out[ j++ ] = (byte)( 0xc0 | ( b >> 6 ) );
 
735
                                        out[ j++ ] = (byte)( 0x80 | ( b & 0x3f ) );
 
736
                                }
 
737
                        }
 
738
 
 
739
                        return ByteBuffer.wrap( out, 0, j );
630
740
                }
631
741
        }
632
742
}