/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

  • Committer: edam
  • Date: 2011-03-23 07:50:13 UTC
  • Revision ID: edam@waxworlds.org-20110323075013-qnza5odtlsjtcz0p
- updated TODO and NEWS
- handle different multiline schemes better
- import addresses
- check for escaped semi-colons and newlines in N, ADR and ORG lines
- minor optimisations

Show diffs side-by-side

added added

removed removed

32
32
import java.io.IOException;
33
33
import java.io.UnsupportedEncodingException;
34
34
import java.nio.ByteBuffer;
 
35
import java.util.ArrayList;
35
36
import java.util.Arrays;
36
37
import java.util.HashSet;
37
38
import java.util.Iterator;
38
39
import java.util.List;
 
40
import java.util.NoSuchElementException;
39
41
import java.util.Set;
40
42
import java.util.Vector;
41
43
import java.util.regex.Matcher;
42
44
import java.util.regex.Pattern;
43
 
import java.util.NoSuchElementException;
44
 
import java.lang.UnsupportedOperationException;
45
45
 
46
46
import android.content.SharedPreferences;
47
47
import android.provider.Contacts;
334
334
                private final static int NAMELEVEL_FN = 2;
335
335
                private final static int NAMELEVEL_N = 3;
336
336
 
 
337
                private final static int MULTILINE_NONE = 0;
 
338
                private final static int MULTILINE_ENCODED = 1; // v2.1 quoted-printable
 
339
                private final static int MULTILINE_ESCAPED = 2; // v2.1 \\CRLF
 
340
                private final static int MULTILINE_FOLDED = 3;  // v3.0 folding
 
341
 
337
342
                private String _version = null;
338
343
                private Vector< ByteBuffer > _buffers = null;
339
344
                private int _name_level = NAMELEVEL_NONE;
340
 
                private boolean _parser_in_encoded_multiline = false;
341
 
                private boolean _parser_in_folded_multiline = false;
 
345
                private int _parser_multiline_state = MULTILINE_NONE;
342
346
                private String _parser_current_name_and_params = null;
343
347
                private String _parser_buffered_value_so_far = "";
344
348
 
469
473
                                String name_and_params;
470
474
                                int pos;
471
475
 
472
 
                                if( _parser_in_encoded_multiline ||
473
 
                                        _parser_in_folded_multiline )
 
476
                                if( _parser_multiline_state != MULTILINE_NONE )
474
477
                                {
475
478
                                        // if we're currently in a multi-line value, use the stored
476
479
                                        // property name and parameters
477
480
                                        name_and_params = _parser_current_name_and_params;
478
481
 
 
482
                                        // skip some initial line characters, depending on the type
 
483
                                        // of multi-line we're handling
479
484
                                        pos = buffer.position();
480
 
 
481
 
                                        // for folded multi-lines, skip the single space at the
482
 
                                        // start of the next line
483
 
                                        if( _parser_in_folded_multiline )
 
485
                                        switch( _parser_multiline_state )
 
486
                                        {
 
487
                                        case MULTILINE_FOLDED:
484
488
                                                pos++;
485
 
 
486
 
                                        // else, this must be an encoded multi-line, so skip any
487
 
                                        // whitespace we find at the start of the next line
488
 
                                        else
 
489
                                                break;
 
490
                                        case MULTILINE_ENCODED:
489
491
                                                while( pos < buffer.limit() && (
490
492
                                                        buffer.get( pos ) == ' ' ||
491
493
                                                        buffer.get( pos ) == '\t' ) )
492
494
                                                {
493
495
                                                        pos++;
494
496
                                                }
 
497
                                                break;
 
498
                                        default:
 
499
                                                // do nothing
 
500
                                        }
 
501
 
 
502
                                        // take us out of multi-line so that we can re-detect that
 
503
                                        // this line is a multi-line or not
 
504
                                        _parser_multiline_state = MULTILINE_NONE;
495
505
                                }
496
506
                                else
497
507
                                {
552
562
//                                      unencoding_result = unencodeBase64( props[ 1 ], charset );
553
563
                                if( unencoding_result != null ) {
554
564
                                        value = unencoding_result.getBuffer();
555
 
                                        _parser_in_encoded_multiline =
556
 
                                                unencoding_result.isAnotherLineRequired();
 
565
                                        if( unencoding_result.isAnotherLineRequired() )
 
566
                                                _parser_multiline_state = MULTILINE_ENCODED;
557
567
                                }
558
568
 
559
569
                                // convert 8-bit ASCII charset to US-ASCII
571
581
                                        throw new ParseException( R.string.error_vcf_charset );
572
582
                                }
573
583
 
 
584
                                // for some entries that have semicolon-separated value parts,
 
585
                                // check to see if the value ends in an escape character, which
 
586
                                // indicates that we have a multi-line value
 
587
                                if( ( name_param_parts[ 0 ].equals( "N" ) ||
 
588
                                        name_param_parts[ 0 ].equals( "ORG" ) ||
 
589
                                        name_param_parts[ 0 ].equals( "ADR" ) ) &&
 
590
                                        doesStringEndInAnEscapeChar( string_value ) )
 
591
                                {
 
592
                                        _parser_multiline_state = MULTILINE_ESCAPED;
 
593
                                        string_value = string_value.substring( 0,
 
594
                                                string_value.length() - 1 );
 
595
                                }
 
596
 
574
597
                                // now we know whether we're in an encoding multi-line,
575
598
                                // determine if we're in a v3 folded multi-line or not
576
 
                                _parser_in_folded_multiline = !_parser_in_encoded_multiline &&
577
 
                                        _version.equals( "3.0" ) && next_line_looks_folded;
 
599
                                if( _parser_multiline_state == MULTILINE_NONE &&
 
600
                                        _version.equals( "3.0" ) && next_line_looks_folded )
 
601
                                {
 
602
                                        _parser_multiline_state = MULTILINE_FOLDED;
 
603
                                }
578
604
 
579
 
                                // handle multi-line requests
580
 
                                if( _parser_in_encoded_multiline ||
581
 
                                        _parser_in_folded_multiline )
582
 
                                {
 
605
                                // handle multi-lines by buffering them and parsing them when we
 
606
                                // are processing the last line in a multi-line sequence
 
607
                                if( _parser_multiline_state != MULTILINE_NONE ) {
583
608
                                        _parser_buffered_value_so_far += string_value;
584
609
                                        return;
585
610
                                }
586
 
 
587
 
                                // add on buffered multi-line content
588
611
                                String complete_value =
589
 
                                        _parser_buffered_value_so_far + string_value;
 
612
                                        ( _parser_buffered_value_so_far + string_value ).trim();
590
613
 
591
614
                                // ignore empty values
592
615
                                if( complete_value.length() < 1 ) return;
602
625
                                        parseTEL( name_param_parts, complete_value );
603
626
                                else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
604
627
                                        parseEMAIL( name_param_parts, complete_value );
605
 
                        }
 
628
                                else if( name_param_parts[ 0 ].equals( "ADR" ) )
 
629
                                        parseADR( name_param_parts, complete_value );
 
630
                        }
 
631
                }
 
632
 
 
633
                private boolean doesStringEndInAnEscapeChar( String string )
 
634
                {
 
635
                        // count the number of backslashes at the end of the string
 
636
                        int count = 0;
 
637
                        for( int a = string.length() - 1; a >= 0; a-- )
 
638
                                if( string.charAt( a ) == '\\' )
 
639
                                        count++;
 
640
                                else
 
641
                                        break;
 
642
 
 
643
                        // if there are an even number of backslashes then the final one
 
644
                        // doesn't count
 
645
                        return ( count & 1 ) == 1;
 
646
                }
 
647
 
 
648
                private String[] splitValueBySemicolon( String value )
 
649
                {
 
650
                        // split string in to parts by semicolon
 
651
                        ArrayList< String > parts = new ArrayList< String >(
 
652
                                Arrays.asList( value.split(  ";" ) ) );
 
653
 
 
654
                        // go through parts
 
655
                        for( int a = 0; a < parts.size(); a++ )
 
656
                        {
 
657
                                String str = parts.get( a );
 
658
 
 
659
                                // look for parts that end in an escape character, but ignore
 
660
                                // the final part. We've already detected escape chars at the
 
661
                                // end of the final part in parseLine() and handled multi-lines
 
662
                                // accordingly.
 
663
                                if( a < parts.size() - 1 &&
 
664
                                        doesStringEndInAnEscapeChar( str ) )
 
665
                                {
 
666
                                        // join the next part to this part and remove the next part
 
667
                                        parts.set( a, str.substring( 0, str.length() - 1 ) +
 
668
                                                ';' + parts.get( a + 1 ) );
 
669
                                        parts.remove( a + 1 );
 
670
 
 
671
                                        // re-visit this part
 
672
                                        a--;
 
673
                                        continue;
 
674
                                }
 
675
 
 
676
                                // trim and replace string
 
677
                                str = str.trim();
 
678
                                parts.set( a, str );
 
679
                        }
 
680
 
 
681
                        String[] ret = new String[ parts.size() ];
 
682
                        return parts.toArray( ret );
606
683
                }
607
684
 
608
685
                private void parseN( String[] params, String value )
613
690
                        if( _name_level >= NAMELEVEL_N ) return;
614
691
 
615
692
                        // get name parts
616
 
                        String[] name_parts = value.split( ";" );
617
 
                        for( int i = 0; i < name_parts.length; i++ )
618
 
                                name_parts[ i ] = name_parts[ i ].trim();
 
693
                        String[] name_parts = splitValueBySemicolon( value );
619
694
 
620
695
                        // build name
621
696
                        value = "";
652
727
                        if( _name_level >= NAMELEVEL_ORG ) return;
653
728
 
654
729
                        // get org parts
655
 
                        String[] org_parts = value.split( ";" );
656
 
                        for( int i = 0; i < org_parts.length; i++ )
657
 
                                org_parts[ i ] = org_parts[ i ].trim();
 
730
                        String[] org_parts = splitValueBySemicolon( value );
658
731
 
659
732
                        // build name
660
733
                        if( org_parts.length > 1 && org_parts[ 0 ].length() == 0 )
661
734
                                value = org_parts[ 1 ];
 
735
                        else if( org_parts.length > 1 && org_parts[ 1 ].length() > 0 )
 
736
                                value = org_parts[ 0 ] + ", " + org_parts[ 1 ];
662
737
                        else
663
738
                                value = org_parts[ 0 ];
664
739
 
706
781
                        Set< String > types = extractTypes( params, Arrays.asList(
707
782
                                "PREF", "WORK", "HOME", "INTERNET" ) );
708
783
 
709
 
                        // here's the logic...
 
784
                        // add email address
710
785
                        boolean preferred = types.contains( "PREF" );
711
786
                        if( types.contains( "WORK" ) )
712
787
                                addEmail( value, Contacts.ContactMethods.TYPE_WORK, preferred );
714
789
                                addEmail( value, Contacts.ContactMethods.TYPE_HOME, preferred );
715
790
                }
716
791
 
 
792
                private void parseADR( String[] params, String value )
 
793
                        throws ParseException, SkipContactException
 
794
                {
 
795
                        // get address parts
 
796
                        String[] adr_parts = splitValueBySemicolon( value );
 
797
 
 
798
                        // build address
 
799
                        value = "";
 
800
                        for( int a = 0; a < adr_parts.length; a++ ) {
 
801
                                if( value.length() > 0 ) value += "\n";
 
802
                                value += adr_parts[ a ].trim();
 
803
                        }
 
804
 
 
805
                        Set< String > types = extractTypes( params, Arrays.asList(
 
806
                                "PREF", "WORK", "HOME", "INTERNET" ) );
 
807
 
 
808
                        // add address
 
809
                        if( types.contains( "WORK" ) )
 
810
                                addAddress( value, Contacts.ContactMethods.TYPE_WORK );
 
811
                        else
 
812
                                addAddress( value, Contacts.ContactMethods.TYPE_HOME);
 
813
                }
 
814
 
717
815
                public void finaliseParsing()
718
816
                        throws ParseException, SkipContactException,
719
817
                        AbortImportException